From 347375c53e69f2b5a4a20880c40a0ef7b12b503d Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Mon, 7 Aug 2023 16:11:59 -0500 Subject: [PATCH] add the schema for SMs and metrics, remove from project yml schema --- schemas/1.6/dbt_project-1.6.json | 32 --- schemas/1.6/dbt_yml_files-1.6.json | 426 +++++++++++++++++++++++------ tests/1.6/invalid/dbt_project.yml | 4 + tests/1.6/valid/dbt_yml_files.yml | 85 +++--- 4 files changed, 395 insertions(+), 152 deletions(-) diff --git a/schemas/1.6/dbt_project-1.6.json b/schemas/1.6/dbt_project-1.6.json index 104a3d3..ee6d54d 100644 --- a/schemas/1.6/dbt_project-1.6.json +++ b/schemas/1.6/dbt_project-1.6.json @@ -48,9 +48,6 @@ "macro-paths": { "$ref": "#/$defs/array_of_strings" }, - "metrics": { - "$ref": "#/$defs/metric_configs" - }, "model-paths": { "$ref": "#/$defs/array_of_strings" }, @@ -221,35 +218,6 @@ }, "additionalProperties": false }, - "metric_configs": { - "title": "Metric configs", - "type": "object", - "description": "Configurations set in the dbt_project.yml file will apply to all metrics that don't have a more specific configuration set.", - "properties": { - "+enabled": { - "$ref": "#/$defs/boolean_or_jinja_string" - }, - "+treat_null_values_as_zero": { - "$ref": "#/$defs/boolean_or_jinja_string" - }, - "enabled": { - "$ref": "#/$defs/boolean_or_jinja_string" - }, - "treat_null_values_as_zero": { - "$ref": "#/$defs/boolean_or_jinja_string" - } - }, - "additionalProperties": { - "oneOf": [ - { - "$ref": "#/$defs/metric_configs" - }, - { - "$ref": "#/$defs/empty_directory" - } - ] - } - }, "model_configs": { "title": "Model configs", "type": "object", diff --git a/schemas/1.6/dbt_yml_files-1.6.json b/schemas/1.6/dbt_yml_files-1.6.json index 4bceb34..14693a6 100644 --- a/schemas/1.6/dbt_yml_files-1.6.json +++ b/schemas/1.6/dbt_yml_files-1.6.json @@ -188,102 +188,35 @@ "type": "array", "items": { "type": "object", - "required": ["name", "label", "calculation_method", "expression"], - "if": { - "properties": { - "calculation_method": { - "const": "derived" - } - } - }, - "then": { - "required": ["name", "label", "calculation_method", "expression"] - }, - "else": { - "required": [ - "name", - "model", - "label", - "calculation_method", - "expression" - ] - }, + "required": ["name", "label", "type", "type_params"], "properties": { - "config": { - "type": "object", - "properties": { - "enabled": { - "$ref": "#/$defs/boolean_or_jinja_string" - }, - "treat_null_values_as_zero": { - "$ref": "#/$defs/boolean_or_jinja_string" - }, - "group": { - "type": "string" - } - } - }, - "calculation_method": { - "type": "string" - }, "description": { "type": "string" }, - "dimensions": { - "$ref": "#/$defs/array_of_strings" - }, - "expression": { + "filter": { "type": "string" }, - "filters": { - "type": "array", - "items": { - "type": "object", - "required": ["field", "operator", "value"], - "properties": { - "field": { - "type": "string" - }, - "operator": { - "type": "string" - }, - "value": { - "type": "string" - } - }, - "additionalProperties": false - } - }, "label": { "type": "string" }, - "meta": { - "type": "object" - }, - "model": { - "type": "string", - "default": "ref('')" - }, "name": { + "pattern": "(?!.*__).*^[a-z][a-z0-9_]*[a-z0-9]$", "type": "string" }, - "time_grains": { - "$ref": "#/$defs/array_of_strings" - }, - "timestamp": { - "type": "string" - }, - "window": { - "type": "object", - "properties": { - "count": { - "type": "integer" - }, - "period": { - "type": "string", - "enum": ["day", "week", "month", "year"] - } - } + "type": { + "enum": [ + "SIMPLE", + "RATIO", + "CUMULATIVE", + "DERIVED", + "simple", + "ratio", + "cumulative", + "derived" + ] + }, + "type_params": { + "$ref": "#/$defs/metric_type_params" } }, "additionalProperties": false @@ -431,6 +364,57 @@ "additionalProperties": false } }, + "semantic_models": { + "type": "array", + "items": { + "type": "object", + "required": ["name", "model"], + "properties": { + "defaults": { + "type": "object", + "properties": { + "agg_time_dimension": { + "type": "string" + } + }, + "additionalProperties": false + }, + "description": { + "type": "string" + }, + "dimensions": { + "type": "array", + "items": { + "$ref": "#/$defs/dimension" + } + }, + "entities": { + "type": "array", + "items": { + "$ref": "#/$defs/entity" + } + }, + "measures": { + "type": "array", + "items": { + "$ref": "#/$defs/measure" + } + }, + "model": { + "type": "string", + "default": "ref('')" + }, + "name": { + "pattern": "(?!.*__).*^[a-z][a-z0-9_]*[a-z0-9]$", + "type": "string" + }, + "primary_entity": { + "type": "string" + } + }, + "additionalProperties": false + } + }, "snapshots": { "type": "array", "items": { @@ -656,6 +640,21 @@ }, "additionalProperties": false, "$defs": { + "aggregation_type_params": { + "type": "object", + "properties": { + "percentile": { + "type": "number" + }, + "use_approximate_percentile": { + "type": "boolean" + }, + "use_discrete_percentile": { + "type": "boolean" + } + }, + "additionalProperties": false + }, "array_of_strings": { "type": "array", "items": { @@ -742,6 +741,98 @@ } } }, + "dimension": { + "type": "object", + "required": ["name", "type"], + "anyOf": [ + { + "not": { + "$ref": "#/$defs/is-time-dimension" + } + }, + { + "required": ["type_params"] + } + ], + "properties": { + "description": { + "type": "string" + }, + "expr": { + "type": ["string", "boolean"] + }, + "is_partition": { + "type": "boolean" + }, + "name": { + "pattern": "(?!.*__).*^[a-z][a-z0-9_]*[a-z0-9]$", + "type": "string" + }, + "type": { + "enum": ["CATEGORICAL", "TIME", "categorical", "time"] + }, + "type_params": { + "$ref": "#/$defs/dimension_type_params" + } + }, + "additionalProperties": false + }, + "dimension_type_params": { + "type": "object", + "required": ["time_granularity"], + "properties": { + "time_granularity": { + "enum": [ + "DAY", + "WEEK", + "MONTH", + "QUARTER", + "YEAR", + "day", + "week", + "month", + "quarter", + "year" + ] + }, + "validity_params": { + "$ref": "#/$defs/validity_params" + } + }, + "additionalProperties": false + }, + "entity": { + "required": ["name", "type"], + "type": "object", + "properties": { + "entity": { + "type": "string" + }, + "expr": { + "type": ["string", "boolean"] + }, + "name": { + "pattern": "(?!.*__).*^[a-z][a-z0-9_]*[a-z0-9]$", + "type": "string" + }, + "role": { + "type": "string" + }, + "type": { + "enum": [ + "PRIMARY", + "UNIQUE", + "FOREIGN", + "NATURAL", + "primary", + "unique", + "foreign", + "natural" + ] + } + }, + "additionalProperties": false + }, "freshness_definition": { "default": { "warn_after": { @@ -799,10 +890,146 @@ } } }, + "is-time-dimension": { + "properties": { + "type": { + "enum": ["TIME", "time"] + } + }, + "required": ["type"] + }, "jinja_string": { "type": "string", "pattern": "\\{\\{.*\\}\\}" }, + "measure": { + "required": ["name", "agg"], + "type": "object", + "properties": { + "agg": { + "enum": [ + "SUM", + "MIN", + "MAX", + "AVERAGE", + "COUNT_DISTINCT", + "SUM_BOOLEAN", + "COUNT", + "PERCENTILE", + "MEDIAN", + "sum", + "min", + "max", + "average", + "count_distinct", + "sum_boolean", + "count", + "percentile", + "median" + ] + }, + "agg_params": { + "$ref": "#/$defs/aggregation_type_params" + }, + "agg_time_dimension": { + "pattern": "(?!.*__).*^[a-z][a-z0-9_]*[a-z0-9]$", + "type": "string" + }, + "create_metric": { + "type": "boolean" + }, + "create_metric_display_name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "expr": { + "type": ["string", "integer", "boolean"] + }, + "name": { + "pattern": "(?!.*__).*^[a-z][a-z0-9_]*[a-z0-9]$", + "type": "string" + }, + "non_additive_dimension": { + "$ref": "#/$defs/non_additive_dimension" + } + }, + "additionalProperties": false + }, + "metric_input_measure": { + "oneOf": [ + { + "type": "string" + }, + { + "additionalProperties": false, + "properties": { + "alias": { + "type": "string" + }, + "filter": { + "type": "string" + }, + "name": { + "type": "string" + } + }, + "type": "object" + } + ] + }, + "metric_input_schema": { + "type": "object", + "properties": { + "alias": { + "type": "string" + }, + "filter": { + "type": "string" + }, + "name": { + "type": "string" + }, + "offset_to_grain": { + "type": "string" + }, + "offset_window": { + "type": "string" + } + }, + "additionalProperties": false + }, + "metric_type_params": { + "type": "object", + "properties": { + "denominator": { + "$ref": "#/$defs/metric_input_measure" + }, + "expr": { + "type": ["string", "boolean"] + }, + "grain_to_date": { + "type": "string" + }, + "measure": { + "$ref": "#/$defs/metric_input_measure" + }, + "metrics": { + "items": { + "$ref": "#/$defs/metric_input_schema" + }, + "type": "array" + }, + "numerator": { + "$ref": "#/$defs/metric_input_measure" + }, + "window": { + "type": "string" + } + }, + "additionalProperties": false + }, "model_configs": { "type": "object", "properties": { @@ -867,6 +1094,25 @@ } } }, + "non_additive_dimension": { + "type": "object", + "required": ["name"], + "properties": { + "name": { + "type": "string" + }, + "window_choice": { + "enum": ["MIN", "MAX", "min", "max"] + }, + "window_groupings": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "additionalProperties": false + }, "number_or_jinja_string": { "oneOf": [ { @@ -1066,6 +1312,18 @@ } } ] + }, + "validity_params": { + "type": "object", + "properties": { + "is_end": { + "type": "boolean" + }, + "is_start": { + "type": "boolean" + } + }, + "additionalProperties": false } } } diff --git a/tests/1.6/invalid/dbt_project.yml b/tests/1.6/invalid/dbt_project.yml index dd02827..b1ee0d9 100644 --- a/tests/1.6/invalid/dbt_project.yml +++ b/tests/1.6/invalid/dbt_project.yml @@ -49,6 +49,10 @@ tests: +enabled: never +severity: plaid +semantic_models: + test: + +enabled: false + metrics: test: invalid: true \ No newline at end of file diff --git a/tests/1.6/valid/dbt_yml_files.yml b/tests/1.6/valid/dbt_yml_files.yml index b0c8a29..7bd16d2 100644 --- a/tests/1.6/valid/dbt_yml_files.yml +++ b/tests/1.6/valid/dbt_yml_files.yml @@ -79,42 +79,55 @@ groups: - name: analytics owner: name: dave - -metrics: - - name: new_customers - label: New Customers marked 'paying' - model: ref('dim_model_7') - description: "The number of paid customers using the product" - - calculation_method: count - expression: user_id # superfluous here, but shown as an example - - timestamp: signup_date - time_grains: [day, week, month] +semantic_models: + - name: customers + defaults: + agg_time_dimension: first_ordered_at + description: | + Customer grain mart. + model: ref('customers') + entities: + - name: customer + expr: customer_id + type: primary dimensions: - - plan - - country - - filters: - - field: is_paying - operator: '=' - value: 'true' - - field: company_name - operator: '!=' - value: "Acme', Inc" + - name: customer_name + type: categorical + - name: customer_type + type: categorical + - name: first_ordered_at + type: time + type_params: + time_granularity: day + - name: last_ordered_at + type: time + type_params: + time_granularity: day + measures: + - name: count_lifetime_orders + description: Total count of orders per customer. + agg: sum + - name: lifetime_spend_pretax + description: Customer lifetime spend before taxes. + agg: sum + - name: lifetime_spend + agg: sum + description: Gross customer lifetime spend inclusive of taxes. - meta: - team: "Finance" - refresh_rate: "Bob's weekly run" - - - name: derived_metric - label: New Customers marked 'paying' - description: "The number of paid customers using the product" - - calculation_method: derived - expression: "{{ metric('new_customers') }} * 2" - - window: - count: 14 - period: day +metrics: + # Simple metrics + - name: customers_with_orders + description: "Distict count of customers placing orders" + type: simple + label: Customers w/ Orders + type_params: + measure: customers_with_orders + - name: new_customer + description: Unique count of new customers. + label: New Customers + type: simple + type_params: + measure: customers_with_orders + filter: | + {{ Dimension('customer__customer_type') }} = 'new' \ No newline at end of file