Skip to content

Commit

Permalink
add precision option to equality test
Browse files Browse the repository at this point in the history
  • Loading branch information
rlh1994 committed Feb 7, 2023
1 parent 7149065 commit 31405d5
Show file tree
Hide file tree
Showing 9 changed files with 152 additions and 33 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@
## Contributors:
--->

# Unreleased
## New features
- The `equality` test now accepts an additional argument, `precision` to aide in comparing floating point numbers ([#757](https://github.com/dbt-labs/dbt-utils/issues/757), [#765](https://github.com/dbt-labs/dbt-utils/pull/765))
## Contributors:
- [@rlh1994](https://github.com/rlh1994)

# Unreleased
## New features
- Add option to ignore columns in equality test ([#734](https://github.com/dbt-labs/dbt-utils/issues/734), [#737](https://github.com/dbt-labs/dbt-utils/pull/737))
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ This test supports the `group_by_columns` parameter; see [Grouping in tests](#gr

#### equality ([source](macros/generic_tests/equality.sql))

Asserts the equality of two relations. Optionally specify a subset of columns to compare or ignore.
Asserts the equality of two relations. Optionally specify a subset of columns to compare or ignore, and a precision to compare numeric columns on.

**Usage:**

Expand All @@ -135,6 +135,7 @@ models:
compare_columns:
- first_column
- second_column
precision: 4
# compare all columns except the ones on the ignore list
- name: model_name_ignore_columns
Expand Down
11 changes: 11 additions & 0 deletions integration_tests/data/schema_tests/data_test_equality_floats.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
id,float_number
05ac09c4-f947-45a8-8c14-88f430f8b294,62.3888186
cfae9054-940b-42a1-84d4-052daae6194f,81.2511656
6029501d-c274-49f2-a69d-4c75a3d9931d,23.3959675
c653e520-df81-4a5f-b44b-bb1b4c1b7846,72.2100841
59caed0d-53d6-473c-a88c-3726c7693f05,68.6029434
b441f6a0-ce7f-4ad9-b96b-b41d73a94ae7,72.7861425
26491840-bfd4-4496-9ca9-ad9220a2de47,35.3662223
b4f233ce-a494-4bb6-9cf2-73bb6854e58a,89.1524680
11c979b7-2661-4375-8143-7c9b54b90627,19.5755431
a8057f73-312e-48e6-b344-f4a510a2c4a8,22.9237047
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
id,float_number,to_ignore
05ac09c4-f947-45a8-8c14-88f430f8b294,62.3888186,a
cfae9054-940b-42a1-84d4-052daae6194f,81.2511656,a
6029501d-c274-49f2-a69d-4c75a3d9931d,23.3959675,a
c653e520-df81-4a5f-b44b-bb1b4c1b7846,72.2100841,a
59caed0d-53d6-473c-a88c-3726c7693f05,68.6029434,a
b441f6a0-ce7f-4ad9-b96b-b41d73a94ae7,72.7861425,a
26491840-bfd4-4496-9ca9-ad9220a2de47,35.3662223,a
b4f233ce-a494-4bb6-9cf2-73bb6854e58a,89.1524680,a
11c979b7-2661-4375-8143-7c9b54b90627,19.5755431,a
a8057f73-312e-48e6-b344-f4a510a2c4a8,22.9237047,a
10 changes: 9 additions & 1 deletion integration_tests/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ seeds:
sql:
data_events_20180103:
+schema: events

data_get_column_values_dropped:
# this.incorporate() to hardcode the node's type as otherwise dbt doesn't know it yet
+post-hook: "{% do adapter.drop_relation(this.incorporate(type='table')) %}"
Expand All @@ -71,3 +71,11 @@ seeds:
data_test_sequential_timestamps:
+column_types:
my_timestamp: timestamp

data_test_equality_floats:
+column_types:
float_number: float

data_test_equality_floats_columns:
+column_types:
float_number: float
40 changes: 37 additions & 3 deletions integration_tests/models/generic_tests/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,14 +89,14 @@ seeds:
upper_bound_column: valid_to
partition_by: subscription_id
zero_length_range_allowed: true

- name: data_unique_combination_of_columns
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- month
- product

- name: data_cardinality_equality_a
columns:
- name: same_name
Expand Down Expand Up @@ -209,7 +209,41 @@ models:
- first_name
- last_name
- email


- name: test_equality_floats
tests:
# test precision only
- dbt_utils.equality:
compare_model: ref('data_test_equality_floats')
precision: 4
- dbt_utils.equality:
compare_model: ref('data_test_equality_floats')
precision: 8
error_if: "<1" #sneaky way to ensure that the test is returning failing rows
warn_if: "<0"

- name: test_equality_floats_columns
tests:
# Positive assertion tests
- dbt_utils.equality:
compare_model: ref('data_test_equality_floats_columns')
compare_columns:
- id
- float_number
precision: 4
- dbt_utils.equality:
compare_model: ref('data_test_equality_floats_columns')
ignore_columns:
- to_ignore
precision: 4
# all columns should fail even with rounding
- dbt_utils.equality:
compare_model: ref('data_test_equality_floats_columns')
precision: 4
error_if: "<1" #sneaky way to ensure that the test is returning failing rows
warn_if: "<0"


- name: test_fewer_rows_than
tests:
- dbt_utils.fewer_rows_than:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
with data as (

select * from {{ ref('data_test_equality_floats') }}

)

select
id, float_number + 0.0000001 as float_number
from data
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
with data as (

select * from {{ ref('data_test_equality_floats') }}

)

select
id, float_number + 0.0000001 as float_number, 'b' as to_ignore
from data
86 changes: 58 additions & 28 deletions macros/generic_tests/equality.sql
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
{% test equality(model, compare_model, compare_columns=None, ignore_columns=None) %}
{{ return(adapter.dispatch('test_equality', 'dbt_utils')(model, compare_model, compare_columns, ignore_columns)) }}
{% test equality(model, compare_model, compare_columns=None, ignore_columns=None, precision = None) %}
{{ return(adapter.dispatch('test_equality', 'dbt_utils')(model, compare_model, compare_columns, ignore_columns, precision)) }}
{% endtest %}

{% macro default__test_equality(model, compare_model, compare_columns=None, ignore_columns=None) %}
{% macro default__test_equality(model, compare_model, compare_columns=None, ignore_columns=None, precision = None) %}

{%- if compare_columns and ignore_columns -%}
{{ exceptions.raise_compiler_error("Both a compare and an ignore list were provided to the `equality` macro. Only one is allowed") }}
{%- endif -%}

{% set set_diff %}
{% set set_diff %}
count(*) + coalesce(abs(
sum(case when which_diff = 'a_minus_b' then 1 else 0 end) -
sum(case when which_diff = 'b_minus_a' then 1 else 0 end)
Expand All @@ -26,32 +26,62 @@
-- setup
{%- do dbt_utils._is_relation(model, 'test_equality') -%}

{#-
If the compare_cols arg is provided, we can run this test without querying the
information schema — this allows the model to be an ephemeral model
-#}

{%- if not compare_columns -%}
{%- if not precision -%}
{#-
If the compare_cols arg is provided, we can run this test without querying the
information schema — this allows the model to be an ephemeral model
-#}
{%- if not compare_columns -%}
{%- do dbt_utils._is_ephemeral(model, 'test_equality') -%}
{%- set compare_columns = adapter.get_columns_in_relation(model)-%}


{%- if ignore_columns -%}
{#-- Lower case ignore columns for easier comparison --#}
{%- set ignore_columns = ignore_columns | map("lower") | list %}

{# Filter out the excluded columns #}
{%- set include_columns = [] %}
{%- for column in compare_columns -%}
{%- if column.name | lower not in ignore_columns -%}
{% do include_columns.append(column) %}
{%- endif %}
{%- endfor %}

{%- set compare_columns = include_columns | map(attribute='quoted') %}
{%- else -%}
{%- set compare_columns = compare_columns | map(attribute='quoted') %}
{%- endif -%}
{%- endif -%}

{% set compare_cols_csv = compare_columns | join(', ') %}

{% else %}
{#-
If rounding is required, we need to get the types, so it can't be ephermeral
-#}
{%- do dbt_utils._is_ephemeral(model, 'test_equality') -%}
{%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='name') -%}
{%- endif -%}

{%- if ignore_columns -%}
{#-- Lower case ignore columns for easier comparison --#}
{%- set ignore_columns = ignore_columns | map("lower") | list %}

{%- set include_columns = [] %}
{%- for column in compare_columns -%}
{%- if column | lower not in ignore_columns -%}
{% do include_columns.append(column) %}
{%- endif %}
{%- endfor %}
{%- set columns = adapter.get_columns_in_relation(model) -%}
{% set columns_list = [] %}
{%- for col in columns -%}
{%- if (
(col.name|lower in compare_columns|map('lower') or not compare_columns) and
(col.name|lower not in ignore_columns|map('lower') or not ignore_columns)
) -%}
{# Databricks double type is not picked up by any number type checks in dbt #}
{%- if col.is_float() or col.is_numeric() or col.data_type == 'double' -%}
{# Cast is required due to postgres not having round for a double precision number #}
{%- do columns_list.append('round(cast(' ~ col.quoted ~ ' as ' ~ dbt.type_numeric() ~ '),' ~ precision ~ ') as ' ~ col.quoted) -%}
{%- else -%}
{%- do columns_list.append(col.quoted) -%}
{%- endif -%}
{% endif %}
{%- endfor -%}
{% set compare_cols_csv = columns_list | join(', ') %}
{%- set compare_columns = include_columns %}

{%- endif -%}

{% set compare_cols_csv = compare_columns | join(', ') %}
{% endif %}
with a as (
Expand Down

0 comments on commit 31405d5

Please sign in to comment.