Skip to content

Commit

Permalink
Merge pull request #163 from kokorin/feature/support-option-to-proddu…
Browse files Browse the repository at this point in the history
…ce-column-type-with-precision

Support char_length and numeric_precision options
  • Loading branch information
z3z1ma authored Jun 17, 2024
2 parents 6b17010 + eb8723a commit 9c179dd
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 5 deletions.
Binary file modified demo_duckdb/jaffle_shop.duckdb
Binary file not shown.
8 changes: 5 additions & 3 deletions demo_duckdb/models/customers.sql
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ customer_payments as (

select
orders.customer_id,
sum(amount) as total_amount
sum(amount) as total_amount,
avg(amount) as average_amount

from payments

Expand All @@ -50,11 +51,12 @@ final as (
select
customers.customer_id,
customers.first_name,
customers.last_name,
cast(customers.last_name as varchar(256)) as last_name,
customer_orders.first_order,
customer_orders.most_recent_order,
customer_orders.number_of_orders,
customer_payments.total_amount as customer_lifetime_value
customer_payments.total_amount as customer_lifetime_value,
cast(customer_payments.average_amount as decimal) as customer_average_value

from customers

Expand Down
19 changes: 17 additions & 2 deletions src/dbt_osmosis/core/osmosis.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
)

import ruamel.yaml
from dbt.adapters.base.column import Column
from dbt.contracts.results import CatalogArtifact, CatalogKey, CatalogTable, ColumnMetadata

from dbt_osmosis.core.column_level_knowledge_propagator import ColumnLevelKnowledgePropagator
Expand Down Expand Up @@ -119,6 +120,8 @@ def __init__(
skip_add_columns: bool = False,
skip_add_tags: bool = False,
skip_add_data_types: bool = False,
numeric_precision: bool = False,
char_length: bool = False,
skip_merge_meta: bool = False,
add_progenitor_to_meta: bool = False,
vars: Optional[str] = None,
Expand All @@ -136,6 +139,8 @@ def __init__(
self.skip_add_columns = skip_add_columns
self.skip_add_tags = skip_add_tags
self.skip_add_data_types = skip_add_data_types
self.numeric_precision = numeric_precision
self.char_length = char_length
self.skip_merge_meta = skip_merge_meta
self.add_progenitor_to_meta = add_progenitor_to_meta
self.use_unrendered_descriptions = use_unrendered_descriptions
Expand Down Expand Up @@ -366,6 +371,16 @@ def catalog(self) -> Optional[CatalogArtifact]:
self._catalog = CatalogArtifact.from_dict(json.loads(file_path.read_text()))
return self._catalog

def _get_column_type(self, column: Column) -> str:
if (
column.is_numeric()
and self.numeric_precision
or column.is_string()
and self.char_length
):
return column.data_type
return column.dtype

@lru_cache(maxsize=5000)
def get_columns_meta(self, catalog_key: CatalogKey) -> Dict[str, ColumnMetadata]:
"""Get all columns in a list for a model"""
Expand Down Expand Up @@ -411,7 +426,7 @@ def get_columns_meta(self, catalog_key: CatalogKey) -> Dict[str, ColumnMetadata]
continue
columns[self.column_casing(c.name)] = ColumnMetadata(
name=self.column_casing(c.name),
type=c.data_type,
type=self._get_column_type(c),
index=None, # type: ignore
comment=getattr(c, "comment", None),
)
Expand All @@ -421,7 +436,7 @@ def get_columns_meta(self, catalog_key: CatalogKey) -> Dict[str, ColumnMetadata]
continue
columns[self.column_casing(exp.name)] = ColumnMetadata(
name=self.column_casing(exp.name),
type=exp.data_type,
type=self._get_column_type(exp),
index=None, # type: ignore
comment=getattr(exp, "comment", None),
)
Expand Down
42 changes: 42 additions & 0 deletions src/dbt_osmosis/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,16 @@ def wrapper(*args, **kwargs):
is_flag=True,
help="If specified, we will skip adding data types to the models.",
)
@click.option(
"--numeric-precision",
is_flag=True,
help="If specified, numeric types will have precision and scale, e.g. Number(38, 8).",
)
@click.option(
"--char-length",
is_flag=True,
help="If specified, character types will have length, e.g. Varchar(128).",
)
@click.option(
"--skip-merge-meta",
is_flag=True,
Expand Down Expand Up @@ -196,6 +206,8 @@ def refactor(
skip_add_columns: bool = False,
skip_add_tags: bool = False,
skip_add_data_types: bool = False,
numeric_precision: bool = False,
char_length: bool = False,
skip_merge_meta: bool = False,
add_progenitor_to_meta: bool = False,
models: Optional[List[str]] = None,
Expand Down Expand Up @@ -229,6 +241,8 @@ def refactor(
skip_add_columns=skip_add_columns,
skip_add_tags=skip_add_tags,
skip_add_data_types=skip_add_data_types,
numeric_precision=numeric_precision,
char_length=char_length,
skip_merge_meta=skip_merge_meta,
add_progenitor_to_meta=add_progenitor_to_meta,
profile=profile,
Expand Down Expand Up @@ -287,6 +301,16 @@ def refactor(
is_flag=True,
help="If specified, we will skip adding data types to the models.",
)
@click.option(
"--numeric-precision",
is_flag=True,
help="If specified, numeric types will have precision and scale, e.g. Number(38, 8).",
)
@click.option(
"--char-length",
is_flag=True,
help="If specified, character types will have length, e.g. Varchar(128).",
)
@click.option(
"--skip-merge-meta",
is_flag=True,
Expand Down Expand Up @@ -333,6 +357,8 @@ def organize(
skip_add_columns: bool = False,
skip_add_tags: bool = False,
skip_add_data_types: bool = False,
numeric_precision: bool = False,
char_length: bool = False,
skip_merge_meta: bool = False,
add_progenitor_to_meta: bool = False,
profile: Optional[str] = None,
Expand Down Expand Up @@ -362,6 +388,8 @@ def organize(
skip_add_columns=skip_add_columns,
skip_add_tags=skip_add_tags,
skip_add_data_types=skip_add_data_types,
numeric_precision=numeric_precision,
char_length=char_length,
skip_merge_meta=skip_merge_meta,
add_progenitor_to_meta=add_progenitor_to_meta,
profile=profile,
Expand Down Expand Up @@ -434,6 +462,16 @@ def organize(
is_flag=True,
help="If specified, we will skip adding data types to the models.",
)
@click.option(
"--numeric-precision",
is_flag=True,
help="If specified, numeric types will have precision and scale, e.g. Number(38, 8).",
)
@click.option(
"--char-length",
is_flag=True,
help="If specified, character types will have length, e.g. Varchar(128).",
)
@click.option(
"--skip-merge-meta",
is_flag=True,
Expand Down Expand Up @@ -490,6 +528,8 @@ def document(
skip_add_columns: bool = False,
skip_add_tags: bool = False,
skip_add_data_types: bool = False,
numeric_precision: bool = False,
char_length: bool = False,
skip_merge_meta: bool = False,
add_progenitor_to_meta: bool = False,
profile: Optional[str] = None,
Expand Down Expand Up @@ -521,6 +561,8 @@ def document(
skip_add_columns=skip_add_columns,
skip_add_tags=skip_add_tags,
skip_add_data_types=skip_add_data_types,
numeric_precision=numeric_precision,
char_length=char_length,
skip_merge_meta=skip_merge_meta,
add_progenitor_to_meta=add_progenitor_to_meta,
profile=profile,
Expand Down
36 changes: 36 additions & 0 deletions tests/test_yaml_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,25 @@ def _customer_column_types(yaml_manager: DbtYamlManager) -> dict[str, str]:

def test_get_columns_meta(yaml_manager: DbtYamlManager):
assert _customer_column_types(yaml_manager) == {
# in DuckDB decimals always have presision and scale
"customer_average_value": "DECIMAL(18,3)",
"customer_id": "INTEGER",
"customer_lifetime_value": "DOUBLE",
"first_name": "VARCHAR",
"first_order": "DATE",
"last_name": "VARCHAR",
"most_recent_order": "DATE",
"number_of_orders": "BIGINT",
}


def test_get_columns_meta_char_length():
yaml_manager = DbtYamlManager(
project_dir="demo_duckdb", profiles_dir="demo_duckdb", char_length=True, dry_run=True
)
assert _customer_column_types(yaml_manager) == {
# in DuckDB decimals always have presision and scale
"customer_average_value": "DECIMAL(18,3)",
"customer_id": "INTEGER",
"customer_lifetime_value": "DOUBLE",
"first_name": "character varying(256)",
Expand All @@ -78,3 +97,20 @@ def test_get_columns_meta(yaml_manager: DbtYamlManager):
"most_recent_order": "DATE",
"number_of_orders": "BIGINT",
}


def test_get_columns_meta_numeric_precision():
yaml_manager = DbtYamlManager(
project_dir="demo_duckdb", profiles_dir="demo_duckdb", numeric_precision=True, dry_run=True
)
assert _customer_column_types(yaml_manager) == {
# in DuckDB decimals always have presision and scale
"customer_average_value": "DECIMAL(18,3)",
"customer_id": "INTEGER",
"customer_lifetime_value": "DOUBLE",
"first_name": "VARCHAR",
"first_order": "DATE",
"last_name": "VARCHAR",
"most_recent_order": "DATE",
"number_of_orders": "BIGINT",
}

0 comments on commit 9c179dd

Please sign in to comment.