diff --git a/CHANGELOG.md b/CHANGELOG.md index b7799278..abafceb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ ---> # Unreleased + +## Enhancements +- Added an optional `exclude_columns` to the `equality` test. ([#828](https://github.com/dbt-labs/dbt-utils/issues/828)) + ## Fixes - deduplicate macro for Databricks now uses the QUALIFY clause, which fixes NULL columns issues from the default natural join logic - deduplicate macro for Redshift now uses the QUALIFY clause, which fixes NULL columns issues from the default natural join logic diff --git a/README.md b/README.md index 827626ba..bd3b28e7 100644 --- a/README.md +++ b/README.md @@ -114,15 +114,20 @@ This test supports the `group_by_columns` parameter; see [Grouping in tests](#gr ### equality ([source](macros/generic_tests/equality.sql)) -Asserts the equality of two relations. Optionally specify a subset of columns to compare. +Asserts the equality of two relations. Optionally specify a subset of columns to compare or to exclude. **Usage:** ```yaml -version: 2 - models: - name: model_name + tests: + - dbt_utils.equality: + compare_model: ref('other_table_name') +``` + +**With `compare_columns` (optional):** +```yaml tests: - dbt_utils.equality: compare_model: ref('other_table_name') @@ -131,6 +136,28 @@ models: - second_column ``` +*Note:* The compare columns are case-insensitive (input uppercase or lowercase and it will work!). +If your adapter is Snowflake and the columns of your model are quoted (bad idea!), you should quote the compare columns like so: +```yaml + compare_columns: + - '"first_column"' + - '"second_column"' +``` + + +**With `exclude_columns` (optional):** +```yaml + tests: + - dbt_utils.equality: + compare_model: ref('other_table_name') + exclude_columns: + - third_column + - fourth_column +``` + +*Note:* The exclude columns are case-insensitive (input uppercase or lowercase and it will work!). + + ### expression_is_true ([source](macros/generic_tests/expression_is_true.sql)) Asserts that a valid SQL expression is true for all records. This is useful when checking integrity across columns. diff --git a/integration_tests/models/generic_tests/schema.yml b/integration_tests/models/generic_tests/schema.yml index fa0e7441..c1fecc6a 100644 --- a/integration_tests/models/generic_tests/schema.yml +++ b/integration_tests/models/generic_tests/schema.yml @@ -192,6 +192,14 @@ models: - last_name - email + - name: test_equal_column_exclude + tests: + - dbt_utils.equality: + compare_model: ref('data_people') + exclude_columns: + - first_name + - last_name + - name: test_fewer_rows_than tests: - dbt_utils.fewer_rows_than: diff --git a/integration_tests/models/generic_tests/test_equal_column_exclude.sql b/integration_tests/models/generic_tests/test_equal_column_exclude.sql new file mode 100644 index 00000000..cf176315 --- /dev/null +++ b/integration_tests/models/generic_tests/test_equal_column_exclude.sql @@ -0,0 +1,13 @@ +{{ config(materialized='table') }} + +select + + id, + 'incorrect_name' as first_name, + last_name, + email, + ip_address, + created_at, + is_active + +from {{ ref('data_people') }} diff --git a/macros/generic_tests/equality.sql b/macros/generic_tests/equality.sql index ffc6a2b8..2eeba5e5 100644 --- a/macros/generic_tests/equality.sql +++ b/macros/generic_tests/equality.sql @@ -1,8 +1,8 @@ -{% test equality(model, compare_model, compare_columns=None) %} - {{ return(adapter.dispatch('test_equality', 'dbt_utils')(model, compare_model, compare_columns)) }} +{% test equality(model, compare_model, compare_columns=None, exclude_columns=None) %} + {{ return(adapter.dispatch('test_equality', 'dbt_utils')(model, compare_model, compare_columns, exclude_columns)) }} {% endtest %} -{% macro default__test_equality(model, compare_model, compare_columns=None) %} +{% macro default__test_equality(model, compare_model, compare_columns=None, exclude_columns=None) %} {% set set_diff %} count(*) + coalesce(abs( @@ -27,12 +27,31 @@ If the compare_cols arg is provided, we can run this test without querying the information schema — this allows the model to be an ephemeral model -#} -{%- if not compare_columns -%} +{%- if compare_columns -%} + {%- set should_quote = False -%} +{%- else -%} {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%} - {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') -%} + {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='name') | list -%} + {%- set should_quote = True -%} {%- endif -%} -{% set compare_cols_csv = compare_columns | join(', ') %} +{%- if exclude_columns -%} + {%- set exclude_columns_lower = exclude_columns | map('lower') | list -%} + {%- set compare_columns_final = [] -%} + {%- for column_name in compare_columns -%} + {%- if column_name | lower not in exclude_columns_lower -%} + {%- do compare_columns_final.append(column_name) -%} + {%- endif -%} + {%- endfor -%} + {%- set compare_columns = compare_columns_final -%} +{%- endif -%} + +{% if should_quote %} + {% set compare_cols_csv = get_quoted_csv(compare_columns) %} +{% else %} + {% set compare_cols_csv = compare_columns | join(', ') %} +{% endif %} + with a as (