From 086a175a14b5d3a7b6cc2b7a04d2036069e854f2 Mon Sep 17 00:00:00 2001 From: Brice Loustau Date: Wed, 30 Aug 2023 09:24:42 -0700 Subject: [PATCH 1/6] Updated macro and added integration test --- integration_tests/models/generic_tests/schema.yml | 8 ++++++++ .../generic_tests/test_equal_column_exclude.sql | 13 +++++++++++++ macros/generic_tests/equality.sql | 12 ++++++++---- 3 files changed, 29 insertions(+), 4 deletions(-) create mode 100644 integration_tests/models/generic_tests/test_equal_column_exclude.sql diff --git a/integration_tests/models/generic_tests/schema.yml b/integration_tests/models/generic_tests/schema.yml index fa0e7441..c1fecc6a 100644 --- a/integration_tests/models/generic_tests/schema.yml +++ b/integration_tests/models/generic_tests/schema.yml @@ -192,6 +192,14 @@ models: - last_name - email + - name: test_equal_column_exclude + tests: + - dbt_utils.equality: + compare_model: ref('data_people') + exclude_columns: + - first_name + - last_name + - name: test_fewer_rows_than tests: - dbt_utils.fewer_rows_than: diff --git a/integration_tests/models/generic_tests/test_equal_column_exclude.sql b/integration_tests/models/generic_tests/test_equal_column_exclude.sql new file mode 100644 index 00000000..cf176315 --- /dev/null +++ b/integration_tests/models/generic_tests/test_equal_column_exclude.sql @@ -0,0 +1,13 @@ +{{ config(materialized='table') }} + +select + + id, + 'incorrect_name' as first_name, + last_name, + email, + ip_address, + created_at, + is_active + +from {{ ref('data_people') }} diff --git a/macros/generic_tests/equality.sql b/macros/generic_tests/equality.sql index ffc6a2b8..1bfedbad 100644 --- a/macros/generic_tests/equality.sql +++ b/macros/generic_tests/equality.sql @@ -1,8 +1,8 @@ -{% test equality(model, compare_model, compare_columns=None) %} - {{ return(adapter.dispatch('test_equality', 'dbt_utils')(model, compare_model, compare_columns)) }} +{% test equality(model, compare_model, compare_columns=None, exclude_columns=None) %} + {{ return(adapter.dispatch('test_equality', 'dbt_utils')(model, compare_model, compare_columns, exclude_columns)) }} {% endtest %} -{% macro default__test_equality(model, compare_model, compare_columns=None) %} +{% macro default__test_equality(model, compare_model, compare_columns=None, exclude_columns=None) %} {% set set_diff %} count(*) + coalesce(abs( @@ -29,7 +29,11 @@ information schema — this allows the model to be an ephemeral model {%- if not compare_columns -%} {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%} - {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') -%} + {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='name') | list -%} +{%- endif -%} + +{%- if exclude_columns -%} + {%- set compare_columns = compare_columns | reject('in', exclude_columns) | list -%} {%- endif -%} {% set compare_cols_csv = compare_columns | join(', ') %} From 3bb457e456c963677bd7234e8e74213d23215366 Mon Sep 17 00:00:00 2001 From: Brice Loustau Date: Wed, 30 Aug 2023 09:30:29 -0700 Subject: [PATCH 2/6] Updated README --- README.md | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 827626ba..5ef64071 100644 --- a/README.md +++ b/README.md @@ -114,15 +114,20 @@ This test supports the `group_by_columns` parameter; see [Grouping in tests](#gr ### equality ([source](macros/generic_tests/equality.sql)) -Asserts the equality of two relations. Optionally specify a subset of columns to compare. +Asserts the equality of two relations. Optionally specify a subset of columns to compare or to exclude. **Usage:** ```yaml -version: 2 - models: - name: model_name + tests: + - dbt_utils.equality: + compare_model: ref('other_table_name') +``` + +With `compare_columns`: +```yaml tests: - dbt_utils.equality: compare_model: ref('other_table_name') @@ -131,6 +136,17 @@ models: - second_column ``` +With `exclude_columns`: +```yaml + tests: + - dbt_utils.equality: + compare_model: ref('other_table_name') + exclude_columns: + - third_column + - fourth_column +``` + + ### expression_is_true ([source](macros/generic_tests/expression_is_true.sql)) Asserts that a valid SQL expression is true for all records. This is useful when checking integrity across columns. From 862bdbdb03c51d4f1830cdc471fbe52262468322 Mon Sep 17 00:00:00 2001 From: Brice Loustau Date: Wed, 30 Aug 2023 09:44:50 -0700 Subject: [PATCH 3/6] Added changelog entry --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7799278..abafceb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ ---> # Unreleased + +## Enhancements +- Added an optional `exclude_columns` to the `equality` test. ([#828](https://github.com/dbt-labs/dbt-utils/issues/828)) + ## Fixes - deduplicate macro for Databricks now uses the QUALIFY clause, which fixes NULL columns issues from the default natural join logic - deduplicate macro for Redshift now uses the QUALIFY clause, which fixes NULL columns issues from the default natural join logic From bc2f00287abe20765f7fc033048dce5581c3b418 Mon Sep 17 00:00:00 2001 From: Brice Loustau Date: Wed, 30 Aug 2023 13:53:23 -0700 Subject: [PATCH 4/6] Fixed for Snowflake --- README.md | 2 ++ macros/generic_tests/equality.sql | 10 ++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5ef64071..319e88f0 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,8 @@ With `exclude_columns`: - fourth_column ``` +**Note:** The exclude columns are case-insensitive. + ### expression_is_true ([source](macros/generic_tests/expression_is_true.sql)) diff --git a/macros/generic_tests/equality.sql b/macros/generic_tests/equality.sql index 1bfedbad..0c5eaf35 100644 --- a/macros/generic_tests/equality.sql +++ b/macros/generic_tests/equality.sql @@ -33,10 +33,16 @@ information schema — this allows the model to be an ephemeral model {%- endif -%} {%- if exclude_columns -%} - {%- set compare_columns = compare_columns | reject('in', exclude_columns) | list -%} + {%- set final_columns = [] -%} + {%- for column_name in compare_columns -%} + {%- if column_name | lower not in exclude_columns | map('lower') -%} + {%- do final_columns.append(column_name) -%} + {%- endif -%} + {%- endfor -%} + {%- set compare_columns = final_columns -%} {%- endif -%} -{% set compare_cols_csv = compare_columns | join(', ') %} +{% set compare_cols_csv = get_quoted_csv(compare_columns) %} with a as ( From 149427f7a314117bc09d1d6996d4c02938025491 Mon Sep 17 00:00:00 2001 From: Brice Loustau Date: Wed, 30 Aug 2023 15:25:05 -0700 Subject: [PATCH 5/6] Fixed quoted --- macros/generic_tests/equality.sql | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/macros/generic_tests/equality.sql b/macros/generic_tests/equality.sql index 0c5eaf35..132f5fbf 100644 --- a/macros/generic_tests/equality.sql +++ b/macros/generic_tests/equality.sql @@ -29,20 +29,25 @@ information schema — this allows the model to be an ephemeral model {%- if not compare_columns -%} {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%} - {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='name') | list -%} + {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') | list -%} {%- endif -%} {%- if exclude_columns -%} - {%- set final_columns = [] -%} + {%- set exclude_columns_lower = exclude_columns | map('lower') | list -%} + {%- set compare_columns_final = [] -%} {%- for column_name in compare_columns -%} - {%- if column_name | lower not in exclude_columns | map('lower') -%} - {%- do final_columns.append(column_name) -%} + {%- set column_name_bare = column_name | lower -%} + {%- if column_name_bare.startswith('"') and column_name_bare.endswith('"') -%} + {%- set column_name_bare = column_name_bare[1:-1] -%} + {%- endif -%} + {%- if column_name_bare not in exclude_columns_lower -%} + {%- do compare_columns_final.append(column_name) -%} {%- endif -%} {%- endfor -%} - {%- set compare_columns = final_columns -%} + {%- set compare_columns = compare_columns_final -%} {%- endif -%} -{% set compare_cols_csv = get_quoted_csv(compare_columns) %} +{% set compare_cols_csv = compare_columns | join(', ') %} with a as ( From e5be632fc248d2154db59f441c585710632f0d42 Mon Sep 17 00:00:00 2001 From: Brice Loustau Date: Thu, 31 Aug 2023 09:49:51 -0700 Subject: [PATCH 6/6] Fix quoting --- README.md | 15 ++++++++++++--- macros/generic_tests/equality.sql | 20 ++++++++++++-------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 319e88f0..bd3b28e7 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ models: compare_model: ref('other_table_name') ``` -With `compare_columns`: +**With `compare_columns` (optional):** ```yaml tests: - dbt_utils.equality: @@ -136,7 +136,16 @@ With `compare_columns`: - second_column ``` -With `exclude_columns`: +*Note:* The compare columns are case-insensitive (input uppercase or lowercase and it will work!). +If your adapter is Snowflake and the columns of your model are quoted (bad idea!), you should quote the compare columns like so: +```yaml + compare_columns: + - '"first_column"' + - '"second_column"' +``` + + +**With `exclude_columns` (optional):** ```yaml tests: - dbt_utils.equality: @@ -146,7 +155,7 @@ With `exclude_columns`: - fourth_column ``` -**Note:** The exclude columns are case-insensitive. +*Note:* The exclude columns are case-insensitive (input uppercase or lowercase and it will work!). ### expression_is_true ([source](macros/generic_tests/expression_is_true.sql)) diff --git a/macros/generic_tests/equality.sql b/macros/generic_tests/equality.sql index 132f5fbf..2eeba5e5 100644 --- a/macros/generic_tests/equality.sql +++ b/macros/generic_tests/equality.sql @@ -27,27 +27,31 @@ If the compare_cols arg is provided, we can run this test without querying the information schema — this allows the model to be an ephemeral model -#} -{%- if not compare_columns -%} +{%- if compare_columns -%} + {%- set should_quote = False -%} +{%- else -%} {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%} - {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') | list -%} + {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='name') | list -%} + {%- set should_quote = True -%} {%- endif -%} {%- if exclude_columns -%} {%- set exclude_columns_lower = exclude_columns | map('lower') | list -%} {%- set compare_columns_final = [] -%} {%- for column_name in compare_columns -%} - {%- set column_name_bare = column_name | lower -%} - {%- if column_name_bare.startswith('"') and column_name_bare.endswith('"') -%} - {%- set column_name_bare = column_name_bare[1:-1] -%} - {%- endif -%} - {%- if column_name_bare not in exclude_columns_lower -%} + {%- if column_name | lower not in exclude_columns_lower -%} {%- do compare_columns_final.append(column_name) -%} {%- endif -%} {%- endfor -%} {%- set compare_columns = compare_columns_final -%} {%- endif -%} -{% set compare_cols_csv = compare_columns | join(', ') %} +{% if should_quote %} + {% set compare_cols_csv = get_quoted_csv(compare_columns) %} +{% else %} + {% set compare_cols_csv = compare_columns | join(', ') %} +{% endif %} + with a as (