From fe0679bd4c3fcf446126351cb720ee5daae4ac98 Mon Sep 17 00:00:00 2001 From: Yasuhisa Yoshida Date: Thu, 5 Oct 2023 22:18:35 +0900 Subject: [PATCH 1/8] Add more types to improve code readability (#111) * Add type for catalog with CatalogArtifact * Replace Tuple[str, str, str] with CatalogKey * Fix import ordering * Add types for _run --- src/dbt_osmosis/core/osmosis.py | 56 ++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/src/dbt_osmosis/core/osmosis.py b/src/dbt_osmosis/core/osmosis.py index d4e82a7..389d4b5 100644 --- a/src/dbt_osmosis/core/osmosis.py +++ b/src/dbt_osmosis/core/osmosis.py @@ -10,7 +10,7 @@ from typing import Any, Dict, Iterable, Iterator, List, MutableMapping, Optional, Set, Tuple import ruamel.yaml -from dbt.contracts.results import ColumnMetadata +from dbt.contracts.results import CatalogArtifact, CatalogKey, CatalogTable, ColumnMetadata from pydantic import BaseModel from dbt_osmosis.core.column_level_knowledge_propagator import ColumnLevelKnowledgePropagator @@ -111,7 +111,7 @@ def __init__( self.models = models or [] self.dry_run = dry_run self.catalog_file = catalog_file - self._catalog = None + self._catalog: Optional[CatalogArtifact] = None self.skip_add_columns = skip_add_columns self.skip_add_tags = skip_add_tags self.skip_merge_meta = skip_merge_meta @@ -285,13 +285,13 @@ def get_target_schema_path(self, node: ManifestNode) -> Path: return as_path(self.config.project_root).joinpath(*parts) @staticmethod - def get_database_parts(node: ManifestNode) -> Tuple[str, str, str]: - """Returns a tuple of database, schema, and alias for a given node.""" - return node.database, node.schema, getattr(node, "alias", node.name) + def get_catalog_key(node: ManifestNode) -> CatalogKey: + """Returns CatalogKey for a given node.""" + return CatalogKey(node.database, node.schema, getattr(node, "alias", node.name)) def get_base_model(self, node: ManifestNode) -> Dict[str, Any]: """Construct a base model object with model name, column names populated from database""" - columns = self.get_columns(self.get_database_parts(node)) + columns = self.get_columns(self.get_catalog_key(node)) return { "name": node.name, "columns": [{"name": column_name, "description": ""} for column_name in columns], @@ -302,7 +302,7 @@ def augment_existing_model( ) -> Dict[str, Any]: """Injects columns from database into existing model if not found""" model_columns: List[str] = [c["name"] for c in documentation.get("columns", [])] - database_columns = self.get_columns(self.get_database_parts(node)) + database_columns = self.get_columns(self.get_catalog_key(node)) for column in ( c for c in database_columns if not any(c.lower() == m.lower() for m in model_columns) ): @@ -319,13 +319,13 @@ def augment_existing_model( ) return documentation - def get_columns(self, parts: Tuple[str, str, str]) -> List[str]: + def get_columns(self, catalog_key: CatalogKey) -> List[str]: """Get all columns in a list for a model""" - return list(self.get_columns_meta(parts).keys()) + return list(self.get_columns_meta(catalog_key).keys()) @property - def catalog(self) -> Optional[dict]: + def catalog(self) -> Optional[CatalogArtifact]: """Get the catalog data from the catalog file Catalog data is cached in memory to avoid reading and parsing the file multiple times @@ -337,27 +337,27 @@ def catalog(self) -> Optional[dict]: file_path = Path(self.catalog_file) if not file_path.exists(): return None - self._catalog = json.loads(file_path.read_text()) + self._catalog = CatalogArtifact.from_dict(json.loads(file_path.read_text())) return self._catalog @lru_cache(maxsize=5000) - def get_columns_meta(self, parts: Tuple[str, str, str]) -> Dict[str, ColumnMetadata]: + def get_columns_meta(self, catalog_key: CatalogKey) -> Dict[str, ColumnMetadata]: """Get all columns in a list for a model""" columns = OrderedDict() blacklist = self.config.vars.vars.get("dbt-osmosis", {}).get("_blacklist", []) # If we provide a catalog, we read from it if self.catalog: - matching_models = [ + matching_models: List[CatalogTable] = [ model_values - for model, model_values in self.catalog["nodes"].items() - if model.split(".")[-1] == parts[-1] + for model, model_values in self.catalog.nodes.items() + if model.split(".")[-1] == catalog_key.name ] if matching_models: - for col in matching_models[0]["columns"].values(): - if any(re.match(pattern, col["name"]) for pattern in blacklist): + for col in matching_models[0].columns.values(): + if any(re.match(pattern, col.name) for pattern in blacklist): continue - columns[self.column_casing(col["name"])] = ColumnMetadata( - name=self.column_casing(col["name"]), type=col["type"], index=col["index"] + columns[self.column_casing(col.name)] = ColumnMetadata( + name=self.column_casing(col.name), type=col.type, index=col.index ) else: return columns @@ -365,13 +365,13 @@ def get_columns_meta(self, parts: Tuple[str, str, str]) -> Dict[str, ColumnMetad # If we don't provide a catalog we query the warehouse to get the columns else: with self.adapter.connection_named("dbt-osmosis"): - table = self.adapter.get_relation(*parts) + table = self.adapter.get_relation(*catalog_key) if not table: logger().info( ":cross_mark: Relation %s.%s.%s does not exist in target database," " cannot resolve columns", - *parts, + *catalog_key, ) return columns try: @@ -392,7 +392,7 @@ def get_columns_meta(self, parts: Tuple[str, str, str]) -> Dict[str, ColumnMetad logger().info( ":cross_mark: Could not resolve relation %s.%s.%s against database" " active tables during introspective query: %s", - *parts, + *catalog_key, str(error), ) return columns @@ -782,7 +782,13 @@ def get_column_sets( ] return missing_columns, undocumented_columns, extra_columns - def _run(self, unique_id, node, schema_map, force_inheritance=False): + def _run( + self, + unique_id: str, + node: ManifestNode, + schema_map: Dict[str, SchemaFileLocation], + force_inheritance: bool = False, + ): try: with self.mutex: logger().info(":point_right: Processing model: [bold]%s[/bold]", unique_id) @@ -797,8 +803,8 @@ def _run(self, unique_id, node, schema_map, force_inheritance=False): # Build Sets logger().info(":mag: Resolving columns in database") - database_columns_ordered = self.get_columns(self.get_database_parts(node)) - columns_db_meta = self.get_columns_meta(self.get_database_parts(node)) + database_columns_ordered = self.get_columns(self.get_catalog_key(node)) + columns_db_meta = self.get_columns_meta(self.get_catalog_key(node)) database_columns: Set[str] = set(database_columns_ordered) yaml_columns_ordered = [column for column in node.columns] yaml_columns: Set[str] = set(yaml_columns_ordered) From be30a859e76acf54f3be7f706627e30f4225ad8d Mon Sep 17 00:00:00 2001 From: Yasuhisa Yoshida Date: Tue, 10 Oct 2023 05:22:05 +0900 Subject: [PATCH 2/8] Keep column description if meta.osmosis_keep_description is True (#112) * Keep column description if meta.osmosis_keep_description is True * add tests for osmosis_keep_description * apply isort * Deleted due to unnecessary lines --- .../core/column_level_knowledge.py | 5 + .../core/column_level_knowledge_propagator.py | 60 ++++++- .../test_column_level_knowledge_propagator.py | 167 +++++++++++++++++- 3 files changed, 223 insertions(+), 9 deletions(-) diff --git a/src/dbt_osmosis/core/column_level_knowledge.py b/src/dbt_osmosis/core/column_level_knowledge.py index 3278fe1..9c11e50 100644 --- a/src/dbt_osmosis/core/column_level_knowledge.py +++ b/src/dbt_osmosis/core/column_level_knowledge.py @@ -5,6 +5,11 @@ Knowledge = Dict[str, ColumnLevelKnowledge] +def delete_if_value_is_empty(prior_knowledge: ColumnLevelKnowledge, key: str) -> None: + if not prior_knowledge[key]: + del prior_knowledge[key] + + def get_prior_knowledge( knowledge: Knowledge, column: str, diff --git a/src/dbt_osmosis/core/column_level_knowledge_propagator.py b/src/dbt_osmosis/core/column_level_knowledge_propagator.py index 2e1606a..dc71dbf 100644 --- a/src/dbt_osmosis/core/column_level_knowledge_propagator.py +++ b/src/dbt_osmosis/core/column_level_knowledge_propagator.py @@ -3,6 +3,7 @@ from dbt_osmosis.core.column_level_knowledge import ( ColumnLevelKnowledge, Knowledge, + delete_if_value_is_empty, get_prior_knowledge, ) from dbt_osmosis.core.log_controller import logger @@ -88,6 +89,50 @@ def get_node_columns_with_inherited_knowledge( knowledge = _inherit_column_level_knowledge(manifest, family_tree, placeholders) return knowledge + @staticmethod + def _get_original_knowledge(node: ManifestNode, column: str) -> ColumnLevelKnowledge: + original_knowledge: ColumnLevelKnowledge = { + "description": None, + "tags": set(), + "meta": {}, + } + if column in node.columns: + original_knowledge["description"] = node.columns[column].description + original_knowledge["meta"] = node.columns[column].meta + original_knowledge["tags"] = node.columns[column].tags + return original_knowledge + + @staticmethod + def _merge_prior_knowledge_with_original_knowledge( + prior_knowledge: ColumnLevelKnowledge, + original_knowledge: ColumnLevelKnowledge, + add_progenitor_to_meta: bool, + progenitor: str, + ) -> None: + if "tags" in prior_knowledge: + prior_knowledge["tags"] = list( + set(prior_knowledge["tags"] + list(original_knowledge["tags"])) + ) + else: + prior_knowledge["tags"] = original_knowledge["tags"] + + if "meta" in prior_knowledge: + prior_knowledge["meta"] = { + **original_knowledge["meta"], + **prior_knowledge["meta"], + } + else: + prior_knowledge["meta"] = original_knowledge["meta"] + + if add_progenitor_to_meta and progenitor: + prior_knowledge["meta"]["osmosis_progenitor"] = progenitor + + if original_knowledge["meta"].get("osmosis_keep_description", None): + prior_knowledge["description"] = original_knowledge["description"] + + for k in ["tags", "meta"]: + delete_if_value_is_empty(prior_knowledge, k) + @staticmethod def update_undocumented_columns_with_prior_knowledge( undocumented_columns: Iterable[str], @@ -110,12 +155,19 @@ def update_undocumented_columns_with_prior_knowledge( for column in undocumented_columns: prior_knowledge: ColumnLevelKnowledge = get_prior_knowledge(knowledge, column) progenitor = prior_knowledge.pop("progenitor", None) - prior_knowledge = {k: v for k, v in prior_knowledge.items() if k in inheritables} - if add_progenitor_to_meta and progenitor: - prior_knowledge.setdefault("meta", {}) - prior_knowledge["meta"]["osmosis_progenitor"] = progenitor + prior_knowledge: ColumnLevelKnowledge = { + k: v for k, v in prior_knowledge.items() if k in inheritables + } + + ColumnLevelKnowledgePropagator._merge_prior_knowledge_with_original_knowledge( + prior_knowledge, + ColumnLevelKnowledgePropagator._get_original_knowledge(node, column), + add_progenitor_to_meta, + progenitor, + ) if not prior_knowledge: continue + if column not in node.columns: node.columns[column] = ColumnInfo.from_dict({"name": column, **prior_knowledge}) else: diff --git a/tests/test_column_level_knowledge_propagator.py b/tests/test_column_level_knowledge_propagator.py index 55f8e47..3d7dc24 100644 --- a/tests/test_column_level_knowledge_propagator.py +++ b/tests/test_column_level_knowledge_propagator.py @@ -135,7 +135,18 @@ def test_update_undocumented_columns_with_prior_knowledge(): "my_tag2", ] - target_node = manifest.nodes["model.jaffle_shop_duckdb.customers"] + target_node_name = "model.jaffle_shop_duckdb.customers" + manifest.nodes[target_node_name].columns["customer_id"].tags = set( + [ + "my_tag3", + "my_tag4", + ] + ) + manifest.nodes[target_node_name].columns["customer_id"].meta = { + "my_key": "my_old_value", + "my_new_key": "my_new_value", + } + target_node = manifest.nodes[target_node_name] knowledge = ColumnLevelKnowledgePropagator.get_node_columns_with_inherited_knowledge( manifest, target_node, placeholders=[""] ) @@ -161,12 +172,22 @@ def test_update_undocumented_columns_with_prior_knowledge(): assert ( yaml_file_model_section["columns"][0]["description"] == "THIS COLUMN IS UPDATED FOR TESTING" ) - assert yaml_file_model_section["columns"][0]["meta"] == {"my_key": "my_value"} - assert set(yaml_file_model_section["columns"][0]["tags"]) == set(["my_tag1", "my_tag2"]) + assert yaml_file_model_section["columns"][0]["meta"] == { + "my_key": "my_value", + "my_new_key": "my_new_value", + } + assert set(yaml_file_model_section["columns"][0]["tags"]) == set( + ["my_tag1", "my_tag2", "my_tag3", "my_tag4"] + ) assert target_node.columns["customer_id"].description == "THIS COLUMN IS UPDATED FOR TESTING" - assert target_node.columns["customer_id"].meta == {"my_key": "my_value"} - assert set(target_node.columns["customer_id"].tags) == set(["my_tag1", "my_tag2"]) + assert target_node.columns["customer_id"].meta == { + "my_key": "my_value", + "my_new_key": "my_new_value", + } + assert set(target_node.columns["customer_id"].tags) == set( + ["my_tag1", "my_tag2", "my_tag3", "my_tag4"] + ) def test_update_undocumented_columns_with_prior_knowledge_skip_add_tags(): @@ -314,3 +335,139 @@ def test_update_undocumented_columns_with_prior_knowledge_add_progenitor_to_meta "osmosis_progenitor": "model.jaffle_shop_duckdb.stg_customers", } assert set(target_node.columns["customer_id"].tags) == set(["my_tag1", "my_tag2"]) + + +def test_update_undocumented_columns_with_prior_knowledge_with_osmosis_keep_description(): + manifest = load_manifest() + manifest.nodes["model.jaffle_shop_duckdb.stg_customers"].columns[ + "customer_id" + ].description = "THIS COLUMN IS UPDATED FOR TESTING" + manifest.nodes["model.jaffle_shop_duckdb.stg_customers"].columns["customer_id"].meta = { + "my_key": "my_value", + } + manifest.nodes["model.jaffle_shop_duckdb.stg_customers"].columns["customer_id"].tags = [ + "my_tag1", + "my_tag2", + ] + + column_description_not_updated = ( + "This column will not be updated as it has the 'osmosis_keep_description' attribute" + ) + target_node_name = "model.jaffle_shop_duckdb.customers" + + manifest.nodes[target_node_name].columns[ + "customer_id" + ].description = column_description_not_updated + manifest.nodes[target_node_name].columns["customer_id"].tags = set( + [ + "my_tag3", + "my_tag4", + ] + ) + manifest.nodes[target_node_name].columns["customer_id"].meta = { + "my_key": "my_value", + "osmosis_keep_description": True, + } + + target_node = manifest.nodes[target_node_name] + knowledge = ColumnLevelKnowledgePropagator.get_node_columns_with_inherited_knowledge( + manifest, target_node, placeholders=[""] + ) + yaml_file_model_section = { + "columns": [ + { + "name": "customer_id", + } + ] + } + undocumented_columns = target_node.columns.keys() + ColumnLevelKnowledgePropagator.update_undocumented_columns_with_prior_knowledge( + undocumented_columns, + target_node, + yaml_file_model_section, + knowledge, + skip_add_tags=True, + skip_merge_meta=True, + add_progenitor_to_meta=False, + ) + + assert yaml_file_model_section["columns"][0]["name"] == "customer_id" + assert yaml_file_model_section["columns"][0]["description"] == column_description_not_updated + assert yaml_file_model_section["columns"][0]["meta"] == { + "my_key": "my_value", + "osmosis_keep_description": True, + } + assert set(yaml_file_model_section["columns"][0]["tags"]) == set(["my_tag3", "my_tag4"]) + + assert target_node.columns["customer_id"].description == column_description_not_updated + assert target_node.columns["customer_id"].meta == { + "my_key": "my_value", + "osmosis_keep_description": True, + } + assert set(target_node.columns["customer_id"].tags) == set(["my_tag3", "my_tag4"]) + + +def test_update_undocumented_columns_with_prior_knowledge_add_progenitor_to_meta_and_osmosis_keep_description(): + manifest = load_manifest() + manifest.nodes["model.jaffle_shop_duckdb.stg_customers"].columns[ + "customer_id" + ].description = "THIS COLUMN IS UPDATED FOR TESTING" + manifest.nodes["model.jaffle_shop_duckdb.stg_customers"].columns["customer_id"].meta = { + "my_key": "my_value", + } + manifest.nodes["model.jaffle_shop_duckdb.stg_customers"].columns["customer_id"].tags = [ + "my_tag1", + "my_tag2", + ] + + column_description_not_updated = ( + "This column will not be updated as it has the 'osmosis_keep_description' attribute" + ) + target_node_name = "model.jaffle_shop_duckdb.customers" + + manifest.nodes[target_node_name].columns[ + "customer_id" + ].description = column_description_not_updated + manifest.nodes[target_node_name].columns["customer_id"].meta = { + "my_key": "my_value", + "osmosis_keep_description": True, + } + + target_node = manifest.nodes[target_node_name] + knowledge = ColumnLevelKnowledgePropagator.get_node_columns_with_inherited_knowledge( + manifest, target_node, placeholders=[""] + ) + yaml_file_model_section = { + "columns": [ + { + "name": "customer_id", + } + ] + } + undocumented_columns = target_node.columns.keys() + ColumnLevelKnowledgePropagator.update_undocumented_columns_with_prior_knowledge( + undocumented_columns, + target_node, + yaml_file_model_section, + knowledge, + skip_add_tags=False, + skip_merge_meta=False, + add_progenitor_to_meta=True, + ) + + assert yaml_file_model_section["columns"][0]["name"] == "customer_id" + assert yaml_file_model_section["columns"][0]["description"] == column_description_not_updated + assert yaml_file_model_section["columns"][0]["meta"] == { + "my_key": "my_value", + "osmosis_keep_description": True, + "osmosis_progenitor": "model.jaffle_shop_duckdb.stg_customers", + } + assert set(yaml_file_model_section["columns"][0]["tags"]) == set(["my_tag1", "my_tag2"]) + + assert target_node.columns["customer_id"].description == column_description_not_updated + assert target_node.columns["customer_id"].meta == { + "my_key": "my_value", + "osmosis_keep_description": True, + "osmosis_progenitor": "model.jaffle_shop_duckdb.stg_customers", + } + assert set(target_node.columns["customer_id"].tags) == set(["my_tag1", "my_tag2"]) From 21672a77cd5f73932cfa35bd3f0dd37f492c293f Mon Sep 17 00:00:00 2001 From: Artem Chernov Date: Fri, 27 Oct 2023 10:11:26 +0300 Subject: [PATCH 3/8] Add identifier property support for source entity type. (#114) --- src/dbt_osmosis/core/osmosis.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/dbt_osmosis/core/osmosis.py b/src/dbt_osmosis/core/osmosis.py index 389d4b5..9b12ed4 100644 --- a/src/dbt_osmosis/core/osmosis.py +++ b/src/dbt_osmosis/core/osmosis.py @@ -287,6 +287,8 @@ def get_target_schema_path(self, node: ManifestNode) -> Path: @staticmethod def get_catalog_key(node: ManifestNode) -> CatalogKey: """Returns CatalogKey for a given node.""" + if node.resource_type == NodeType.Source: + return CatalogKey(node.database, node.schema, getattr(node, "identifier", node.name)) return CatalogKey(node.database, node.schema, getattr(node, "alias", node.name)) def get_base_model(self, node: ManifestNode) -> Dict[str, Any]: From ede5a68f6c1f1b9ce2cc3453e63bd4c10dec8106 Mon Sep 17 00:00:00 2001 From: yassun7010 <47286750+yassun7010@users.noreply.github.com> Date: Thu, 7 Dec 2023 18:02:19 +0900 Subject: [PATCH 4/8] feat: load column description from db. (#120) --- src/dbt_osmosis/core/osmosis.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/src/dbt_osmosis/core/osmosis.py b/src/dbt_osmosis/core/osmosis.py index 9b12ed4..811c923 100644 --- a/src/dbt_osmosis/core/osmosis.py +++ b/src/dbt_osmosis/core/osmosis.py @@ -359,7 +359,10 @@ def get_columns_meta(self, catalog_key: CatalogKey) -> Dict[str, ColumnMetadata] if any(re.match(pattern, col.name) for pattern in blacklist): continue columns[self.column_casing(col.name)] = ColumnMetadata( - name=self.column_casing(col.name), type=col.type, index=col.index + name=self.column_casing(col.name), + type=col.type, + index=col.index, + comment=col.comment, ) else: return columns @@ -381,14 +384,20 @@ def get_columns_meta(self, catalog_key: CatalogKey) -> Dict[str, ColumnMetadata] if any(re.match(pattern, c.name) for pattern in blacklist): continue columns[self.column_casing(c.name)] = ColumnMetadata( - name=self.column_casing(c.name), type=c.dtype, index=None + name=self.column_casing(c.name), + type=c.dtype, + index=None, + comment=getattr(c, "comment", None), ) if hasattr(c, "flatten"): for exp in c.flatten(): if any(re.match(pattern, exp.name) for pattern in blacklist): continue columns[self.column_casing(exp.name)] = ColumnMetadata( - name=self.column_casing(exp.name), type=c.dtype, index=None + name=self.column_casing(exp.name), + type=c.dtype, + index=None, + comment=getattr(c, "comment", None), ) except Exception as error: logger().info( @@ -1003,10 +1012,18 @@ def add_missing_cols_to_node_and_model( changes_committed = 0 for column in missing_columns: node.columns[column] = ColumnInfo.from_dict( - {"name": column, "description": "", "data_type": columns_db_meta[column].type} + { + "name": column, + "description": columns_db_meta[column].comment or "", + "data_type": columns_db_meta[column].type, + } ) yaml_file_model_section.setdefault("columns", []).append( - {"name": column, "data_type": columns_db_meta[column].type, "description": ""} + { + "name": column, + "description": columns_db_meta[column].comment or "", + "data_type": columns_db_meta[column].type, + } ) changes_committed += 1 logger().info( From 784fa82e92bd2caffbc9d51bbc2ce4cb083324e7 Mon Sep 17 00:00:00 2001 From: Armaan Dhull Date: Thu, 7 Dec 2023 04:04:39 -0500 Subject: [PATCH 5/8] add sources only if available (#118) --- src/dbt_osmosis/core/osmosis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dbt_osmosis/core/osmosis.py b/src/dbt_osmosis/core/osmosis.py index 811c923..f27964a 100644 --- a/src/dbt_osmosis/core/osmosis.py +++ b/src/dbt_osmosis/core/osmosis.py @@ -686,10 +686,10 @@ def commit_project_restructure_to_disk( target_schema = {"version": 2} elif "version" not in target_schema: target_schema["version"] = 2 - # Add models and sources to target schema + # Add models and sources (if available) to target schema if structure.output["models"]: target_schema.setdefault("models", []).extend(structure.output["models"]) - if structure.output["sources"]: + if structure.output.get("sources") is not None: target_schema.setdefault("sources", []).extend(structure.output["sources"]) if not self.dry_run: self.yaml_handler.dump(target_schema, target) From 5812254bf82e39cc1a88194247d04f560ea8023a Mon Sep 17 00:00:00 2001 From: z3z1ma Date: Sat, 23 Dec 2023 20:56:38 -0700 Subject: [PATCH 6/8] feat: enable setting encoding in yaml handler via env var --- src/dbt_osmosis/core/osmosis.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dbt_osmosis/core/osmosis.py b/src/dbt_osmosis/core/osmosis.py index f27964a..76e15ab 100644 --- a/src/dbt_osmosis/core/osmosis.py +++ b/src/dbt_osmosis/core/osmosis.py @@ -35,6 +35,7 @@ def __init__(self, **kwargs) -> None: self.width = 800 self.preserve_quotes = True self.default_flow_style = False + self.encoding = os.getenv("DBT_OSMOSIS_ENCODING", "utf-8") class SchemaFileLocation(BaseModel): From 187a7db85b390418950b4aea123307b5bd9ad8c8 Mon Sep 17 00:00:00 2001 From: Buu NGUYEN <17428690+buu-nguyen@users.noreply.github.com> Date: Sat, 6 Jan 2024 21:53:37 +0700 Subject: [PATCH 7/8] Get the data_type and comment of child column instead of its parent (#124) --- src/dbt_osmosis/core/osmosis.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dbt_osmosis/core/osmosis.py b/src/dbt_osmosis/core/osmosis.py index 76e15ab..cadf056 100644 --- a/src/dbt_osmosis/core/osmosis.py +++ b/src/dbt_osmosis/core/osmosis.py @@ -396,9 +396,9 @@ def get_columns_meta(self, catalog_key: CatalogKey) -> Dict[str, ColumnMetadata] continue columns[self.column_casing(exp.name)] = ColumnMetadata( name=self.column_casing(exp.name), - type=c.dtype, + type=exp.dtype, index=None, - comment=getattr(c, "comment", None), + comment=getattr(exp, "comment", None), ) except Exception as error: logger().info( From 7a0c63ed8a0ca04d2b9898c3b2a4ad66a7a0d83c Mon Sep 17 00:00:00 2001 From: z3z1ma Date: Sun, 7 Jan 2024 10:56:12 -0500 Subject: [PATCH 8/8] chore: new release --- .changes/0.12.5.md | 10 ++ CHANGELOG.md | 11 ++ flake.lock | 325 --------------------------------------------- flake.nix | 79 ----------- pyproject.toml | 2 +- 5 files changed, 22 insertions(+), 405 deletions(-) create mode 100644 .changes/0.12.5.md delete mode 100644 flake.lock delete mode 100644 flake.nix diff --git a/.changes/0.12.5.md b/.changes/0.12.5.md new file mode 100644 index 0000000..edaf80a --- /dev/null +++ b/.changes/0.12.5.md @@ -0,0 +1,10 @@ +## 0.12.5 - 2024-01-07 +### Added +* more typing to improve code readability +* Keep column description if meta.osmosis_keep_description is True +* Add identifier property support for source entity type. (#114) +* feat: load column description from db +* enable setting encoding in yaml handler via env var +### Fixed +* add sources only if available +* Get the data_type and comment of child column instead of its parent diff --git a/CHANGELOG.md b/CHANGELOG.md index 3a2b942..13316e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,17 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html), and is generated by [Changie](https://github.com/miniscruff/changie). +## 0.12.5 - 2024-01-07 +### Added +* more typing to improve code readability +* Keep column description if meta.osmosis_keep_description is True +* Add identifier property support for source entity type. (#114) +* feat: load column description from db +* enable setting encoding in yaml handler via env var +### Fixed +* add sources only if available +* Get the data_type and comment of child column instead of its parent + ## 0.12.4 - 2023-09-22 ### Added * enabled more pre-commit hooks diff --git a/flake.lock b/flake.lock deleted file mode 100644 index 474c427..0000000 --- a/flake.lock +++ /dev/null @@ -1,325 +0,0 @@ -{ - "nodes": { - "devenv": { - "inputs": { - "flake-compat": "flake-compat", - "nix": "nix", - "nixpkgs": "nixpkgs", - "pre-commit-hooks": "pre-commit-hooks" - }, - "locked": { - "lastModified": 1695196688, - "narHash": "sha256-4C+HSXd69qQe12m8Pc6PgLmJGDKURcqkhVdwRu0xrBw=", - "owner": "cachix", - "repo": "devenv", - "rev": "d3b37c5d6fce8919b9a86c4a3bcacd2d0534341a", - "type": "github" - }, - "original": { - "owner": "cachix", - "repo": "devenv", - "type": "github" - } - }, - "flake-compat": { - "flake": false, - "locked": { - "lastModified": 1673956053, - "narHash": "sha256-4gtG9iQuiKITOjNQQeQIpoIB6b16fm+504Ch3sNKLd8=", - "owner": "edolstra", - "repo": "flake-compat", - "rev": "35bb57c0c8d8b62bbfd284272c928ceb64ddbde9", - "type": "github" - }, - "original": { - "owner": "edolstra", - "repo": "flake-compat", - "type": "github" - } - }, - "flake-parts": { - "inputs": { - "nixpkgs-lib": "nixpkgs-lib" - }, - "locked": { - "lastModified": 1693611461, - "narHash": "sha256-aPODl8vAgGQ0ZYFIRisxYG5MOGSkIczvu2Cd8Gb9+1Y=", - "owner": "hercules-ci", - "repo": "flake-parts", - "rev": "7f53fdb7bdc5bb237da7fefef12d099e4fd611ca", - "type": "github" - }, - "original": { - "id": "flake-parts", - "type": "indirect" - } - }, - "flake-utils": { - "inputs": { - "systems": "systems" - }, - "locked": { - "lastModified": 1685518550, - "narHash": "sha256-o2d0KcvaXzTrPRIo0kOLV0/QXHhDQ5DTi+OxcjO8xqY=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "a1720a10a6cfe8234c0e93907ffe81be440f4cef", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "flake-utils_2": { - "locked": { - "lastModified": 1653893745, - "narHash": "sha256-0jntwV3Z8//YwuOjzhV2sgJJPt+HY6KhU7VZUL0fKZQ=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "1ed9fb1935d260de5fe1c2f7ee0ebaae17ed2fa1", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "gitignore": { - "inputs": { - "nixpkgs": [ - "devenv", - "pre-commit-hooks", - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1660459072, - "narHash": "sha256-8DFJjXG8zqoONA1vXtgeKXy68KdJL5UaXR8NtVMUbx8=", - "owner": "hercules-ci", - "repo": "gitignore.nix", - "rev": "a20de23b925fd8264fd7fad6454652e142fd7f73", - "type": "github" - }, - "original": { - "owner": "hercules-ci", - "repo": "gitignore.nix", - "type": "github" - } - }, - "lowdown-src": { - "flake": false, - "locked": { - "lastModified": 1633514407, - "narHash": "sha256-Dw32tiMjdK9t3ETl5fzGrutQTzh2rufgZV4A/BbxuD4=", - "owner": "kristapsdz", - "repo": "lowdown", - "rev": "d2c2b44ff6c27b936ec27358a2653caaef8f73b8", - "type": "github" - }, - "original": { - "owner": "kristapsdz", - "repo": "lowdown", - "type": "github" - } - }, - "mk-shell-bin": { - "locked": { - "lastModified": 1677004959, - "narHash": "sha256-/uEkr1UkJrh11vD02aqufCxtbF5YnhRTIKlx5kyvf+I=", - "owner": "rrbutani", - "repo": "nix-mk-shell-bin", - "rev": "ff5d8bd4d68a347be5042e2f16caee391cd75887", - "type": "github" - }, - "original": { - "owner": "rrbutani", - "repo": "nix-mk-shell-bin", - "type": "github" - } - }, - "nix": { - "inputs": { - "lowdown-src": "lowdown-src", - "nixpkgs": [ - "devenv", - "nixpkgs" - ], - "nixpkgs-regression": "nixpkgs-regression" - }, - "locked": { - "lastModified": 1676545802, - "narHash": "sha256-EK4rZ+Hd5hsvXnzSzk2ikhStJnD63odF7SzsQ8CuSPU=", - "owner": "domenkozar", - "repo": "nix", - "rev": "7c91803598ffbcfe4a55c44ac6d49b2cf07a527f", - "type": "github" - }, - "original": { - "owner": "domenkozar", - "ref": "relaxed-flakes", - "repo": "nix", - "type": "github" - } - }, - "nix2container": { - "inputs": { - "flake-utils": "flake-utils_2", - "nixpkgs": [ - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1688922987, - "narHash": "sha256-RnQwrCD5anqWfyDAVbfFIeU+Ha6cwt5QcIwIkaGRzQw=", - "owner": "nlewo", - "repo": "nix2container", - "rev": "ab381a7d714ebf96a83882264245dbd34f0a7ec8", - "type": "github" - }, - "original": { - "owner": "nlewo", - "repo": "nix2container", - "type": "github" - } - }, - "nixpkgs": { - "locked": { - "lastModified": 1678875422, - "narHash": "sha256-T3o6NcQPwXjxJMn2shz86Chch4ljXgZn746c2caGxd8=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "126f49a01de5b7e35a43fd43f891ecf6d3a51459", - "type": "github" - }, - "original": { - "owner": "NixOS", - "ref": "nixpkgs-unstable", - "repo": "nixpkgs", - "type": "github" - } - }, - "nixpkgs-lib": { - "locked": { - "dir": "lib", - "lastModified": 1693471703, - "narHash": "sha256-0l03ZBL8P1P6z8MaSDS/MvuU8E75rVxe5eE1N6gxeTo=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "3e52e76b70d5508f3cec70b882a29199f4d1ee85", - "type": "github" - }, - "original": { - "dir": "lib", - "owner": "NixOS", - "ref": "nixos-unstable", - "repo": "nixpkgs", - "type": "github" - } - }, - "nixpkgs-regression": { - "locked": { - "lastModified": 1643052045, - "narHash": "sha256-uGJ0VXIhWKGXxkeNnq4TvV3CIOkUJ3PAoLZ3HMzNVMw=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "215d4d0fd80ca5163643b03a33fde804a29cc1e2", - "type": "github" - }, - "original": { - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "215d4d0fd80ca5163643b03a33fde804a29cc1e2", - "type": "github" - } - }, - "nixpkgs-stable": { - "locked": { - "lastModified": 1685801374, - "narHash": "sha256-otaSUoFEMM+LjBI1XL/xGB5ao6IwnZOXc47qhIgJe8U=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "c37ca420157f4abc31e26f436c1145f8951ff373", - "type": "github" - }, - "original": { - "owner": "NixOS", - "ref": "nixos-23.05", - "repo": "nixpkgs", - "type": "github" - } - }, - "nixpkgs_2": { - "locked": { - "lastModified": 1694959747, - "narHash": "sha256-CXQ2MuledDVlVM5dLC4pB41cFlBWxRw4tCBsFrq3cRk=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "970a59bd19eff3752ce552935687100c46e820a5", - "type": "github" - }, - "original": { - "owner": "NixOS", - "ref": "nixos-unstable", - "repo": "nixpkgs", - "type": "github" - } - }, - "pre-commit-hooks": { - "inputs": { - "flake-compat": [ - "devenv", - "flake-compat" - ], - "flake-utils": "flake-utils", - "gitignore": "gitignore", - "nixpkgs": [ - "devenv", - "nixpkgs" - ], - "nixpkgs-stable": "nixpkgs-stable" - }, - "locked": { - "lastModified": 1688056373, - "narHash": "sha256-2+SDlNRTKsgo3LBRiMUcoEUb6sDViRNQhzJquZ4koOI=", - "owner": "cachix", - "repo": "pre-commit-hooks.nix", - "rev": "5843cf069272d92b60c3ed9e55b7a8989c01d4c7", - "type": "github" - }, - "original": { - "owner": "cachix", - "repo": "pre-commit-hooks.nix", - "type": "github" - } - }, - "root": { - "inputs": { - "devenv": "devenv", - "flake-parts": "flake-parts", - "mk-shell-bin": "mk-shell-bin", - "nix2container": "nix2container", - "nixpkgs": "nixpkgs_2" - } - }, - "systems": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } - } - }, - "root": "root", - "version": 7 -} diff --git a/flake.nix b/flake.nix deleted file mode 100644 index eea1354..0000000 --- a/flake.nix +++ /dev/null @@ -1,79 +0,0 @@ -{ - description = "dbt-osmosis dev environment"; - - inputs = { - nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; - devenv.url = "github:cachix/devenv"; - nix2container.url = "github:nlewo/nix2container"; - nix2container.inputs.nixpkgs.follows = "nixpkgs"; - mk-shell-bin.url = "github:rrbutani/nix-mk-shell-bin"; - }; - - nixConfig = { - extra-trusted-public-keys = - "devenv.cachix.org-1:w1cLUi8dv3hnoSPGAuibQv+f9TZLr6cv/Hm9XgU50cw="; - extra-substituters = "https://devenv.cachix.org"; - }; - - outputs = inputs@{ flake-parts, ... }: - flake-parts.lib.mkFlake { inherit inputs; } { - imports = [ inputs.devenv.flakeModule ]; - systems = [ - "x86_64-linux" - "i686-linux" - "x86_64-darwin" - "aarch64-linux" - "aarch64-darwin" - ]; - - perSystem = { config, self', inputs', pkgs, system, ... }: { - devenv.shells.default = { - name = "dbt-osmosis"; - imports = [ ]; - - # Base environment - dotenv.disableHint = true; - - # Base packages - packages = [ - pkgs.python310 - pkgs.black - pkgs.isort - pkgs.jq - pkgs.yq - pkgs.ruff - pkgs.nixfmt - pkgs.poetry - ]; - - # Utilities - scripts.fmt.exec = '' - echo "Formatting..." - ${pkgs.black}/bin/black src - ${pkgs.isort}/bin/isort src - ${pkgs.nixfmt}/bin/nixfmt flake.nix - ''; - scripts.lint.exec = '' - echo "Linting..." - ${pkgs.ruff}/bin/ruff --fix src/**/*.py - ''; - - # Activate venv on shell enter - enterShell = '' - PROJECT_ROOT=$(git rev-parse --show-toplevel) - echo Setting up Python virtual environment... - [ -d $PROJECT_ROOT/.venv ] || python -m venv $PROJECT_ROOT/.venv - export PATH="$PROJECT_ROOT/.venv/bin:$PATH" - ${pkgs.poetry}/bin/poetry install - eval "$(${pkgs.poetry}/bin/poetry env info --path)/bin/activate" - ''; - - # Languages - languages.nix.enable = true; - languages.python.enable = true; - - }; - }; - flake = { }; - }; -} diff --git a/pyproject.toml b/pyproject.toml index 64d5867..b25b333 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dbt-osmosis" -version = "0.12.4" +version = "0.12.5" description = "A dbt server and suite of optional developer tools to make developing with dbt delightful." authors = ["z3z1ma "] license = "Apache-2.0"