data management
slug: reference-dataset-statementManages dataset loading and handling across all DAZL steps, providing a unified interface to access different data sources. Acts as the core data provider mechanism for the entire DAZL processing pipeline.
dataset parameter to specify its input datadataset: customerData # References $work['customerData']
dataset:
source: source_identifier
type: array|sql|api|json|yaml
options: {} # Type-specific options
type: array)dataset:
source: customerData
type: arraytype: sql)connection: Database connection identifierparameters: Query parameters for prepared statementscache: Cache settings (duration, key)dataset:
source: "SELECT * FROM customers WHERE region = :region"
type: sql
options:
connection: main_db
parameters:
region: "North"type: api)method: HTTP method (GET, POST, etc.)headers: HTTP headersbody: Request body for POST/PUTauth: Authentication detailsdataset:
source: "https://api.example.com/v1/products"
type: api
options:
method: GET
headers:
Authorization: "Bearer ${API_TOKEN}"type: json)path: Alternate file path specificationjsonPath: JSON path expression for data extractiondataset:
source: "/data/products.json"
type: json
options:
jsonPath: "$.products[*]"type: yaml)path: Alternate file path specificationnode: Path to specific node in YAML documentdataset:
source: "/data/config.yaml"
type: yaml
options:
node: "settings.defaults"filter:
dataset: salesData
where:
- "region = 'North'"
- "sales > 1000"
chart:
dataset:
source: "SELECT product, SUM(revenue) as total FROM sales GROUP BY product ORDER BY total DESC LIMIT 10"
type: sql
options:
connection: reporting_db
type: bar
x_axis: product
y_axis: total
title: "Top 10 Products by Revenue"
combine:
method: join
datasets:
customers:
source: customers
type: array
orders:
source: "SELECT * FROM orders WHERE date >= :start_date"
type: sql
options:
connection: orders_db
parameters:
start_date: "2024-01-01"
join_on:
left: customer_id
right: customer_id