From 7efee1995870c2eca6cde385bdb19017a9b207c7 Mon Sep 17 00:00:00 2001 From: Matthew McKnight Date: Thu, 7 Sep 2023 15:21:27 -0500 Subject: [PATCH 01/53] init push of ADAP-394 --- .../bigquery/relation_configs/__init__.py | 0 .../relation_configs/materialized_view.py | 64 +++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 dbt/adapters/bigquery/relation_configs/__init__.py create mode 100644 dbt/adapters/bigquery/relation_configs/materialized_view.py diff --git a/dbt/adapters/bigquery/relation_configs/__init__.py b/dbt/adapters/bigquery/relation_configs/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py new file mode 100644 index 000000000..4a4014d46 --- /dev/null +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -0,0 +1,64 @@ +from dataclasses import dataclass +from typing import Any, Dict, List, Optional, Union +from dbt.adapters.relation_configs.config_base import RelationConfigBase +from dbt.adapters.relation_configs.config_validation import RelationConfigValidationMixin +from dbt.contracts.graph.nodes import ModelNode + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class BigQueryMaterializedViewConfig(RelationConfigBase, RelationConfigValidationMixin): + """ + This config follow the specs found here: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_materialized_view_statement + + The following parameters are configurable by dbt: + - materialized_view_name: Name of the materialized view + - schema: Dataset name of the materialized view + - database: Project name of the database + - cluster_by: A comma-seperated list of of col references to determine cluster. + - Note: Can contain up to four colms in list. + - partition_by: Expression to describe how to partition materialized view. + - Note: Must be partitioned in the same was as base table is partitioned. + - enable_refresh: Enables autoamtic refresh of materialized view when base table is + updated. + - refresh_interval_minutes: frequency at which a materialized view will be refeshed. + - Note: (default is 30 minutes) + - hours_to_expiration: The time when table expires. + - Note: If not set table persists + - max_staleness: if the last refresh is within max_staleness interval, + BigQuery returns data directly from the materialized view without reading base table. + Otherwise it reads from the base to return results withing the staleness interval. + - allow_non_incremental_definition: + - kms_key_name: user defined Cloud KMS encryption key. + - friendly_name: A descriptive name for this table. + - description: A user-friendly description of this table. + - labels: used to organized and group table + - Note on usage can be found + + There are currently no non-configurable parameters. + """ + + materialized_view_name: str + schema_name: str + database_name: str + cluster_by: Optional[Union[List[str], str]] = None + partition_by: Optional[Dict[str, Any]] = None + enable_refresh: bool = True + refresh_interval_minutes: float = 30 + hours_to_expiration: Optional[int] = None + max_staleness: Optional[int] = None + allow_non_incremental_definition: Optional[bool] = None + kms_key_name: Optional[str] = None + friendly_name: Optional[str] = None + description: Optional[str] = None + labels: Optional[Dict[str, str]] = None + + @classmethod + def parse_model_node(cls, model_node: ModelNode) -> dict: + config_dict = { + "materialized_view_name": model_node.identifier, + "schema_name": model_node.schema, + "database_name": model_node.database, + } + + return config_dict From aed5f998d562666b7230681231b3d74fd4f91ed7 Mon Sep 17 00:00:00 2001 From: Matthew McKnight Date: Tue, 12 Sep 2023 16:23:05 -0500 Subject: [PATCH 02/53] update lots of framework and update to main --- .flake8 | 2 + dbt/adapters/bigquery/impl.py | 3 + dbt/adapters/bigquery/relation.py | 3 + .../bigquery/relation_configs/__init__.py | 8 ++ .../bigquery/relation_configs/base.py | 62 ++++++++++ .../relation_configs/materialized_view.py | 111 +++++++++++++++++- .../bigquery/relation_configs/policies.py | 16 +++ 7 files changed, 200 insertions(+), 5 deletions(-) create mode 100644 dbt/adapters/bigquery/relation_configs/base.py create mode 100644 dbt/adapters/bigquery/relation_configs/policies.py diff --git a/.flake8 b/.flake8 index 11baa8ee0..da7e039fd 100644 --- a/.flake8 +++ b/.flake8 @@ -12,3 +12,5 @@ ignore = E741, E501, exclude = tests +per-file-ignores = + */__init__.py: F401 diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index 3f8351861..fc147fcd1 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -190,6 +190,9 @@ class BigqueryConfig(AdapterConfig): require_partition_filter: Optional[bool] = None partition_expiration_days: Optional[int] = None merge_update_columns: Optional[str] = None + enable_refresh: Optional[bool] = True + refresh_interval_minutes: Optional[int] = 30 + max_staleness: Optional[str] = None class BigQueryAdapter(BaseAdapter): diff --git a/dbt/adapters/bigquery/relation.py b/dbt/adapters/bigquery/relation.py index d49677168..cad82ae2b 100644 --- a/dbt/adapters/bigquery/relation.py +++ b/dbt/adapters/bigquery/relation.py @@ -4,6 +4,7 @@ from itertools import chain, islice from dbt.adapters.base.relation import BaseRelation, ComponentName, InformationSchema +from dbt.adapters.bigquery.relation_configs import BigQueryIncludePolicy, BigQueryQuotePolicy from dbt.exceptions import CompilationError from dbt.utils import filter_null_values from typing import TypeVar @@ -16,6 +17,8 @@ class BigQueryRelation(BaseRelation): quote_character: str = "`" location: Optional[str] = None + include_policy = BigQueryIncludePolicy # type: ignore + quote_policy = BigQueryQuotePolicy # type: ignore def matches( self, diff --git a/dbt/adapters/bigquery/relation_configs/__init__.py b/dbt/adapters/bigquery/relation_configs/__init__.py index e69de29bb..967befa6b 100644 --- a/dbt/adapters/bigquery/relation_configs/__init__.py +++ b/dbt/adapters/bigquery/relation_configs/__init__.py @@ -0,0 +1,8 @@ +from dbt.adapters.bigquery.relation_configs.materialized_view import ( + BigQueryMaterializedViewConfig, + BigQueryMaterializedViewConfigChangeset, +) +from dbt.adapters.bigquery.relation_configs.policies import ( + BigQueryIncludePolicy, + BigQueryQuotePolicy, +) diff --git a/dbt/adapters/bigquery/relation_configs/base.py b/dbt/adapters/bigquery/relation_configs/base.py new file mode 100644 index 000000000..9cf44345d --- /dev/null +++ b/dbt/adapters/bigquery/relation_configs/base.py @@ -0,0 +1,62 @@ +from dataclasses import dataclass +from typing import Optional + +import agate +from dbt.adapters.base.relation import Policy +from dbt.adapters.relation_configs import RelationConfigBase, RelationResults +from dbt.adapters.bigquery.relation_configs.policies import ( + BigQueryIncludePolicy, + BigQueryQuotePolicy, +) +from dbt.contracts.graph.nodes import ModelNode +from dbt.contracts.relation import ComponentName + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class BigQueryReleationConfigBase(RelationConfigBase): + @classmethod + def include_policy(cls) -> Policy: + return BigQueryIncludePolicy() + + @classmethod + def quote_policy(cls) -> Policy: + return BigQueryQuotePolicy() + + @classmethod + def from_model_node(cls, model_node: ModelNode) -> "RelationConfigBase": + relation_config = cls.parse_model_node(model_node) + relation = cls.from_dict(relation_config) + return relation + + @classmethod + def parse_model_node(cls, model_node: ModelNode) -> dict: + raise NotImplementedError( + "`parse_model_node()` needs to be implemented on this RelationConfigBase instance" + ) + + @classmethod + def from_relation_results(cls, relation_results: RelationResults) -> "RelationConfigBase": + relation_config = cls.parse_relation_results(relation_results) + relation = cls.from_dict(relation_config) + return relation + + @classmethod + def parse_relation_results(cls, relation_results: RelationResults) -> dict: + raise NotImplementedError( + "`parse_relation_results()` needs to be implemented on this RelationConfigBase instance" + ) + + @classmethod + def _render_part(cls, component: ComponentName, value: Optional[str]) -> Optional[str]: + if cls.include_policy().get_part(component) and value: + if cls.quote_policy().get_part(component): + return f'"{value}"' + return value.lower() + return None + + @classmethod + def _get_first_row(cls, results: agate.Table) -> agate.Row: + try: + return results.rows[0] + except IndexError: + return agate.Row(values=set()) diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index 4a4014d46..598ecc5cb 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -1,12 +1,16 @@ from dataclasses import dataclass from typing import Any, Dict, List, Optional, Union -from dbt.adapters.relation_configs.config_base import RelationConfigBase + +import agate +from dbt.adapters.relation_configs.config_base import RelationResults from dbt.adapters.relation_configs.config_validation import RelationConfigValidationMixin from dbt.contracts.graph.nodes import ModelNode +from dbt.contracts.relation import ComponentName +from dbt.adapters.bigquery.relation_configs.base import BigQueryReleationConfigBase @dataclass(frozen=True, eq=True, unsafe_hash=True) -class BigQueryMaterializedViewConfig(RelationConfigBase, RelationConfigValidationMixin): +class BigQueryMaterializedViewConfig(BigQueryReleationConfigBase, RelationConfigValidationMixin): """ This config follow the specs found here: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_materialized_view_statement @@ -43,22 +47,119 @@ class BigQueryMaterializedViewConfig(RelationConfigBase, RelationConfigValidatio database_name: str cluster_by: Optional[Union[List[str], str]] = None partition_by: Optional[Dict[str, Any]] = None - enable_refresh: bool = True - refresh_interval_minutes: float = 30 + partition_expiration_date: Optional[int] = None + enable_refresh: Optional[bool] = True + refresh_interval_minutes: Optional[int] = 30 hours_to_expiration: Optional[int] = None - max_staleness: Optional[int] = None + max_staleness: Optional[str] = None allow_non_incremental_definition: Optional[bool] = None kms_key_name: Optional[str] = None friendly_name: Optional[str] = None description: Optional[str] = None labels: Optional[Dict[str, str]] = None + @classmethod + def from_dict(cls, config_dict) -> "BigQueryMaterializedViewConfig": + kwargs_dict = { + "materialized_view_name": cls._render_part( + ComponentName.Identifier, config_dict.get("materialized_view_name") + ), + "schema_name": cls._render_part(ComponentName.Schema, config_dict.get("schema_name")), + "database_name": cls._render_part( + ComponentName.Database, config_dict.get("database_name") + ), + "cluster_by": config_dict.get("cluster_by"), + "partition_by": config_dict.get("partition_by"), + "enable_refresh": config_dict.get("enabled_refresh"), + "refresh_interval_minutes": config_dict.get("refresh_interval_minutes"), + "hours_to_expiration": config_dict.get("hours_to_expiration"), + "max_staleness": config_dict.get("max_staleness"), + "allow_non_incremental_definition": config_dict.get( + "allow_non_incremental_definition" + ), + "kms_key_name": config_dict.get("kms_key_name"), + "friendly_name": config_dict.get("friendly_name"), + "description": config_dict.get("description"), + "labels": config_dict.get("labels"), + } + + materialized_view: "BigQueryMaterializedViewConfig" = super().from_dict(kwargs_dict) # type: ignore + return materialized_view + @classmethod def parse_model_node(cls, model_node: ModelNode) -> dict: config_dict = { "materialized_view_name": model_node.identifier, "schema_name": model_node.schema, "database_name": model_node.database, + "cluster_by": model_node.config.extra.get("cluster_by"), + "partition_by": model_node.config.extra.get("partition_by"), + "partition_expiration_date": model_node.config.extra.get("partition_expiration_date"), + "refresh_interval_minutes": model_node.config.extra.get("refresh_interval_minutes"), + "hours_to_expiration": model_node.config.extra.get("hours_to_expiration"), + "max_staleness": model_node.config.extra.get("max_staleness"), + "allow_non_incremental_definition": model_node.config.extra.get( + "allow_non_incremental_definition" + ), + "kms_key_name": model_node.config.extra.get("kms_key_name"), + "friendly_name": model_node.config.extra.get("friendly_name"), + "description": model_node.config.extra.get("description"), + "labels": model_node.config.extra.get("labels"), + } + + autorefresh_value = model_node.config.extra.get("enabled_refresh") + if autorefresh_value is not None: + if isinstance(autorefresh_value, bool): + config_dict["enable_refresh"] = autorefresh_value + elif isinstance(autorefresh_value, str): + lower_autorefresh_value = autorefresh_value.lower() + if lower_autorefresh_value == "true": + config_dict["enable_refresh"] = True + elif lower_autorefresh_value == "false": + config_dict["enable_refresh"] = False + else: + raise ValueError( + "Invalide enable_refresh representation. Please used excepted value ex.(True, 'true', 'True')" + ) + else: + raise TypeError("Invalid autorefresh value: expecting boolean or str.") + + return config_dict + + @classmethod + def parse_relation_results(cls, relation_results: RelationResults) -> dict: + materialized_view: agate.Row = cls._get_first_row( + relation_results.get("materialized_view") # type: ignore[arg-type] + ) + + config_dict = { + "materialized_view_name": materialized_view.get("materialized_view_name"), + "schema_name": materialized_view.get("schema"), + "database_name": materialized_view.get("database"), + "cluster_by": materialized_view.get("cluster_by"), + "partition_by": materialized_view.get("partition_by"), + "enable_refresh": materialized_view.get("enabled_refresh"), + "refresh_interval_minutes": materialized_view.get("refresh_interval_minutes"), + "hours_to_expiration": materialized_view.get("hours_to_expiration"), + "max_staleness": materialized_view.get("max_staleness"), + "allow_non_incremental_definition": materialized_view.get( + "allow_non_incremental_definition" + ), + "kms_key_name": materialized_view.get("kms_key_name"), + "friendly_name": materialized_view.get("friendly_name"), + "description": materialized_view.get("description"), + "labels": materialized_view.get("labels"), } return config_dict + + +@dataclass +class BigQueryMaterializedViewConfigChangeset: + @property + def requires_full_refresh(self) -> bool: + return True + + @property + def has_changes(self) -> bool: + return True diff --git a/dbt/adapters/bigquery/relation_configs/policies.py b/dbt/adapters/bigquery/relation_configs/policies.py new file mode 100644 index 000000000..4467c4340 --- /dev/null +++ b/dbt/adapters/bigquery/relation_configs/policies.py @@ -0,0 +1,16 @@ +from dataclasses import dataclass + +from dbt.adapters.base.relation import Policy + + +class BigQueryIncludePolicy(Policy): + database: bool = True + schema: bool = True + identifier: bool = True + + +@dataclass +class BigQueryQuotePolicy(Policy): + database: bool = True + schema: bool = True + identifier: bool = True From b7cd870eb3f539d759a5fb987c68dc8cb7b5b230 Mon Sep 17 00:00:00 2001 From: Matthew McKnight Date: Wed, 13 Sep 2023 12:34:24 -0500 Subject: [PATCH 03/53] updating based on feedback --- dbt/adapters/bigquery/impl.py | 4 ++-- dbt/adapters/bigquery/relation.py | 6 +++--- .../bigquery/relation_configs/materialized_view.py | 13 ++++++++----- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index fc147fcd1..e7ba7418e 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -190,8 +190,8 @@ class BigqueryConfig(AdapterConfig): require_partition_filter: Optional[bool] = None partition_expiration_days: Optional[int] = None merge_update_columns: Optional[str] = None - enable_refresh: Optional[bool] = True - refresh_interval_minutes: Optional[int] = 30 + enable_refresh: Optional[bool] = None + refresh_interval_minutes: Optional[int] = None max_staleness: Optional[str] = None diff --git a/dbt/adapters/bigquery/relation.py b/dbt/adapters/bigquery/relation.py index cad82ae2b..942ca4e62 100644 --- a/dbt/adapters/bigquery/relation.py +++ b/dbt/adapters/bigquery/relation.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Optional from itertools import chain, islice @@ -17,8 +17,8 @@ class BigQueryRelation(BaseRelation): quote_character: str = "`" location: Optional[str] = None - include_policy = BigQueryIncludePolicy # type: ignore - quote_policy = BigQueryQuotePolicy # type: ignore + include_policy = BigQueryIncludePolicy = field(default_factory=lambda: BigQueryIncludePolicy()) + quote_policy = BigQueryQuotePolicy = field(default_factory=lambda: BigQueryQuotePolicy()) def matches( self, diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index 598ecc5cb..08e4526b0 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -2,6 +2,7 @@ from typing import Any, Dict, List, Optional, Union import agate +from dbt.exceptions import DbtRuntimeError from dbt.adapters.relation_configs.config_base import RelationResults from dbt.adapters.relation_configs.config_validation import RelationConfigValidationMixin from dbt.contracts.graph.nodes import ModelNode @@ -70,7 +71,7 @@ def from_dict(cls, config_dict) -> "BigQueryMaterializedViewConfig": ), "cluster_by": config_dict.get("cluster_by"), "partition_by": config_dict.get("partition_by"), - "enable_refresh": config_dict.get("enabled_refresh"), + "enable_refresh": config_dict.get("enable_refresh"), "refresh_interval_minutes": config_dict.get("refresh_interval_minutes"), "hours_to_expiration": config_dict.get("hours_to_expiration"), "max_staleness": config_dict.get("max_staleness"), @@ -107,7 +108,7 @@ def parse_model_node(cls, model_node: ModelNode) -> dict: "labels": model_node.config.extra.get("labels"), } - autorefresh_value = model_node.config.extra.get("enabled_refresh") + autorefresh_value = model_node.config.extra.get("enable_refresh") if autorefresh_value is not None: if isinstance(autorefresh_value, bool): config_dict["enable_refresh"] = autorefresh_value @@ -128,9 +129,11 @@ def parse_model_node(cls, model_node: ModelNode) -> dict: @classmethod def parse_relation_results(cls, relation_results: RelationResults) -> dict: - materialized_view: agate.Row = cls._get_first_row( - relation_results.get("materialized_view") # type: ignore[arg-type] - ) + materialized_view_config = relation_results.get("materialized_view") + if isinstance(materialized_view_config, agate.Table): + materialized_view = cls._get_first_row(materialized_view_config) + else: + raise DbtRuntimeError("Unsupported type returned ex. None") config_dict = { "materialized_view_name": materialized_view.get("materialized_view_name"), From ed0a69db1e7d5fd66d9fd302b44709d61ca55b69 Mon Sep 17 00:00:00 2001 From: Matthew McKnight Date: Wed, 13 Sep 2023 13:04:53 -0500 Subject: [PATCH 04/53] add changelog --- .changes/unreleased/Features-20230913-130445.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changes/unreleased/Features-20230913-130445.yaml diff --git a/.changes/unreleased/Features-20230913-130445.yaml b/.changes/unreleased/Features-20230913-130445.yaml new file mode 100644 index 000000000..65486fc26 --- /dev/null +++ b/.changes/unreleased/Features-20230913-130445.yaml @@ -0,0 +1,6 @@ +kind: Features +body: provide logic to be able to alter, or reacut to chnges to trigger autorefresh +time: 2023-09-13T13:04:45.761294-05:00 +custom: + Author: McKnight-42 + Issue: "924" From 52f5dfdfa5135ae01757d83150afe0dc4c60333d Mon Sep 17 00:00:00 2001 From: Matthew McKnight Date: Thu, 14 Sep 2023 11:14:54 -0500 Subject: [PATCH 05/53] remove in preivew option --- .../bigquery/relation_configs/materialized_view.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index 08e4526b0..975b40c88 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -33,7 +33,6 @@ class BigQueryMaterializedViewConfig(BigQueryReleationConfigBase, RelationConfig - max_staleness: if the last refresh is within max_staleness interval, BigQuery returns data directly from the materialized view without reading base table. Otherwise it reads from the base to return results withing the staleness interval. - - allow_non_incremental_definition: - kms_key_name: user defined Cloud KMS encryption key. - friendly_name: A descriptive name for this table. - description: A user-friendly description of this table. @@ -53,7 +52,6 @@ class BigQueryMaterializedViewConfig(BigQueryReleationConfigBase, RelationConfig refresh_interval_minutes: Optional[int] = 30 hours_to_expiration: Optional[int] = None max_staleness: Optional[str] = None - allow_non_incremental_definition: Optional[bool] = None kms_key_name: Optional[str] = None friendly_name: Optional[str] = None description: Optional[str] = None @@ -75,9 +73,6 @@ def from_dict(cls, config_dict) -> "BigQueryMaterializedViewConfig": "refresh_interval_minutes": config_dict.get("refresh_interval_minutes"), "hours_to_expiration": config_dict.get("hours_to_expiration"), "max_staleness": config_dict.get("max_staleness"), - "allow_non_incremental_definition": config_dict.get( - "allow_non_incremental_definition" - ), "kms_key_name": config_dict.get("kms_key_name"), "friendly_name": config_dict.get("friendly_name"), "description": config_dict.get("description"), @@ -99,9 +94,6 @@ def parse_model_node(cls, model_node: ModelNode) -> dict: "refresh_interval_minutes": model_node.config.extra.get("refresh_interval_minutes"), "hours_to_expiration": model_node.config.extra.get("hours_to_expiration"), "max_staleness": model_node.config.extra.get("max_staleness"), - "allow_non_incremental_definition": model_node.config.extra.get( - "allow_non_incremental_definition" - ), "kms_key_name": model_node.config.extra.get("kms_key_name"), "friendly_name": model_node.config.extra.get("friendly_name"), "description": model_node.config.extra.get("description"), @@ -145,9 +137,6 @@ def parse_relation_results(cls, relation_results: RelationResults) -> dict: "refresh_interval_minutes": materialized_view.get("refresh_interval_minutes"), "hours_to_expiration": materialized_view.get("hours_to_expiration"), "max_staleness": materialized_view.get("max_staleness"), - "allow_non_incremental_definition": materialized_view.get( - "allow_non_incremental_definition" - ), "kms_key_name": materialized_view.get("kms_key_name"), "friendly_name": materialized_view.get("friendly_name"), "description": materialized_view.get("description"), From eb34007f9c214c6e1e210ff7565a722d095dfbec Mon Sep 17 00:00:00 2001 From: Matthew McKnight Date: Thu, 14 Sep 2023 14:33:01 -0500 Subject: [PATCH 06/53] fill out changeset and config change classes for specific options --- .../relation_configs/materialized_view.py | 48 ++++++++++++++++++- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index 975b40c88..d5e489f14 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -3,6 +3,7 @@ import agate from dbt.exceptions import DbtRuntimeError +from dbt.adapters.relation_configs.config_change import RelationConfigChange from dbt.adapters.relation_configs.config_base import RelationResults from dbt.adapters.relation_configs.config_validation import RelationConfigValidationMixin from dbt.contracts.graph.nodes import ModelNode @@ -146,12 +147,55 @@ def parse_relation_results(cls, relation_results: RelationResults) -> dict: return config_dict +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class BigQueryAutoRefreshConfigChange(RelationConfigChange): + context: Optional[bool] = None + + @property + def requires_full_refresh(self) -> bool: + return False + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class BigQueryPartitionConfigChange(RelationConfigChange): + context: Optional[bool] = None + + @property + def requires_full_refresh(self) -> bool: + return True + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class BigQueryClusterConfigChange(RelationConfigChange): + context: Optional[bool] = None + + @property + def requires_full_refresh(self) -> bool: + return True + + @dataclass class BigQueryMaterializedViewConfigChangeset: + partition_by: Optional[BigQueryPartitionConfigChange] = None + cluster_by: Optional[BigQueryClusterConfigChange] = None + auto_refresh: Optional[BigQueryAutoRefreshConfigChange] = None + @property def requires_full_refresh(self) -> bool: - return True + return any( + { + self.auto_refresh.requires_full_refresh if self.auto_refresh else False, + self.partition_by.requires_full_refresh if self.partition_by else False, + self.cluster_by.requires_full_refresh if self.cluster_by else False, + } + ) @property def has_changes(self) -> bool: - return True + return any( + { + self.partition_by if self.partition_by else False, + self.cluster_by if self.cluster_by else False, + self.auto_refresh if self.auto_refresh else False, + } + ) From 971979e2ffcc649d0d8bd4475b446bbd252f884e Mon Sep 17 00:00:00 2001 From: Matthew McKnight Date: Wed, 20 Sep 2023 16:20:32 -0500 Subject: [PATCH 07/53] change partition_by and cluster_by to FrozenSet, initial attempt at describe.sql --- dbt/adapters/bigquery/relation.py | 49 ++++++++++++++++++- .../bigquery/relation_configs/__init__.py | 3 ++ .../relation_configs/materialized_view.py | 13 +++-- .../relations/materialized_view/alter.sql | 16 ++++-- .../relations/materialized_view/describe.sql | 34 +++++++++++++ tests/unit/test_bigquery_adapter.py | 1 - 6 files changed, 103 insertions(+), 13 deletions(-) create mode 100644 dbt/include/bigquery/macros/relations/materialized_view/describe.sql diff --git a/dbt/adapters/bigquery/relation.py b/dbt/adapters/bigquery/relation.py index 493de155e..03c882e64 100644 --- a/dbt/adapters/bigquery/relation.py +++ b/dbt/adapters/bigquery/relation.py @@ -2,9 +2,18 @@ from typing import Optional from itertools import chain, islice - +from dbt.context.providers import RuntimeConfigObject from dbt.adapters.base.relation import BaseRelation, ComponentName, InformationSchema -from dbt.adapters.bigquery.relation_configs import BigQueryIncludePolicy, BigQueryQuotePolicy +from dbt.adapters.relation_configs import RelationResults, RelationConfigChangeAction +from dbt.adapters.bigquery.relation_configs import ( + BigQueryIncludePolicy, + BigQueryQuotePolicy, + BigQueryMaterializedViewConfig, + BigQueryMaterializedViewConfigChangeset, + BigQueryAutoRefreshConfigChange, + BigQueryClusterConfigChange, + BigQueryPartitionConfigChange, +) from dbt.contracts.relation import RelationType from dbt.exceptions import CompilationError from dbt.utils import filter_null_values @@ -56,6 +65,42 @@ def project(self): def dataset(self): return self.schema + @classmethod + def materialized_view_config_changeset( + cls, relaation_results: RelationResults, runtime_config: RuntimeConfigObject + ) -> Optional[BigQueryMaterializedViewConfigChangeset]: + config_change_collection = BigQueryMaterializedViewConfigChangeset() + existing_materialized_view = BigQueryMaterializedViewConfig.from_relation_results( + relaation_results + ) + new_materialized_view = BigQueryMaterializedViewConfig.from_model_node( + runtime_config.model + ) + assert isinstance(existing_materialized_view, BigQueryMaterializedViewConfig) + assert isinstance(new_materialized_view, BigQueryMaterializedViewConfig) + + if new_materialized_view.enable_refresh != existing_materialized_view.enable_refresh: + config_change_collection.auto_refresh = BigQueryAutoRefreshConfigChange( + action=RelationConfigChangeAction.alter, + context=new_materialized_view.enable_refresh, + ) + + if new_materialized_view.cluster_by != existing_materialized_view.cluster_by: + config_change_collection.cluster_by = BigQueryClusterConfigChange( + action=RelationConfigChangeAction.alter, + context=new_materialized_view.cluster_by, + ) + + if new_materialized_view.partition_by != existing_materialized_view.partition_by: + config_change_collection.partition_by = BigQueryPartitionConfigChange( + action=RelationConfigChangeAction.alter, + context=new_materialized_view.partition_by, + ) + + if config_change_collection: + return config_change_collection + return None + def information_schema(self, identifier: Optional[str] = None) -> "BigQueryInformationSchema": return BigQueryInformationSchema.from_relation(self, identifier) diff --git a/dbt/adapters/bigquery/relation_configs/__init__.py b/dbt/adapters/bigquery/relation_configs/__init__.py index 967befa6b..f3b8ae50b 100644 --- a/dbt/adapters/bigquery/relation_configs/__init__.py +++ b/dbt/adapters/bigquery/relation_configs/__init__.py @@ -1,6 +1,9 @@ from dbt.adapters.bigquery.relation_configs.materialized_view import ( BigQueryMaterializedViewConfig, BigQueryMaterializedViewConfigChangeset, + BigQueryAutoRefreshConfigChange, + BigQueryClusterConfigChange, + BigQueryPartitionConfigChange, ) from dbt.adapters.bigquery.relation_configs.policies import ( BigQueryIncludePolicy, diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index d5e489f14..1c5e622e4 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, FrozenSet, Optional, Union import agate from dbt.exceptions import DbtRuntimeError @@ -46,8 +46,8 @@ class BigQueryMaterializedViewConfig(BigQueryReleationConfigBase, RelationConfig materialized_view_name: str schema_name: str database_name: str - cluster_by: Optional[Union[List[str], str]] = None - partition_by: Optional[Dict[str, Any]] = None + cluster_by: Optional[Union[FrozenSet[List[str]], str]] = None + partition_by: Optional[FrozenSet[Dict[str, Any]]] = None partition_expiration_date: Optional[int] = None enable_refresh: Optional[bool] = True refresh_interval_minutes: Optional[int] = 30 @@ -70,6 +70,7 @@ def from_dict(cls, config_dict) -> "BigQueryMaterializedViewConfig": ), "cluster_by": config_dict.get("cluster_by"), "partition_by": config_dict.get("partition_by"), + "partition_expiration_date": config_dict.get("partition_expiration_date"), "enable_refresh": config_dict.get("enable_refresh"), "refresh_interval_minutes": config_dict.get("refresh_interval_minutes"), "hours_to_expiration": config_dict.get("hours_to_expiration"), @@ -134,6 +135,7 @@ def parse_relation_results(cls, relation_results: RelationResults) -> dict: "database_name": materialized_view.get("database"), "cluster_by": materialized_view.get("cluster_by"), "partition_by": materialized_view.get("partition_by"), + "partition_expiration_date": materialized_view.get("partition_expiration_date"), "enable_refresh": materialized_view.get("enabled_refresh"), "refresh_interval_minutes": materialized_view.get("refresh_interval_minutes"), "hours_to_expiration": materialized_view.get("hours_to_expiration"), @@ -158,7 +160,7 @@ def requires_full_refresh(self) -> bool: @dataclass(frozen=True, eq=True, unsafe_hash=True) class BigQueryPartitionConfigChange(RelationConfigChange): - context: Optional[bool] = None + context: Optional[FrozenSet[Dict[str, Any]]] = None @property def requires_full_refresh(self) -> bool: @@ -167,7 +169,7 @@ def requires_full_refresh(self) -> bool: @dataclass(frozen=True, eq=True, unsafe_hash=True) class BigQueryClusterConfigChange(RelationConfigChange): - context: Optional[bool] = None + context: Optional[Union[FrozenSet[List[str]], str]] = None @property def requires_full_refresh(self) -> bool: @@ -177,6 +179,7 @@ def requires_full_refresh(self) -> bool: @dataclass class BigQueryMaterializedViewConfigChangeset: partition_by: Optional[BigQueryPartitionConfigChange] = None + partition_expiration_days: Optional[BigQueryPartitionConfigChange] = None cluster_by: Optional[BigQueryClusterConfigChange] = None auto_refresh: Optional[BigQueryAutoRefreshConfigChange] = None diff --git a/dbt/include/bigquery/macros/relations/materialized_view/alter.sql b/dbt/include/bigquery/macros/relations/materialized_view/alter.sql index 27819c948..8204c87ba 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/alter.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/alter.sql @@ -4,12 +4,18 @@ sql, existing_relation ) %} - bigquery__get_replace_materialized_view_as_sql( - relation, - sql - ) + + {% if configuration_changes.requires_full_refresh %} + + {{ bigquery__get_replace_materialized_view_as_sql(relation, sql) }} + + {% else %} + + {% endmacro %} {% macro bigquery__get_materialized_view_configuration_changes(existing_relation, new_config) %} - {{- return(None) -}} + {% set _existing_materialized_view = bigquery__describe_materialized_view(existing_relation) %} + {% set _configuration_changes = existing_relation.materialized_view_config_changeset(_existing_materialized_view, new_config) %} + {% do return(_configuration_changes) %} {% endmacro %} diff --git a/dbt/include/bigquery/macros/relations/materialized_view/describe.sql b/dbt/include/bigquery/macros/relations/materialized_view/describe.sql new file mode 100644 index 000000000..f2a568441 --- /dev/null +++ b/dbt/include/bigquery/macros/relations/materialized_view/describe.sql @@ -0,0 +1,34 @@ +{% macro bigquery__describe_materialized_view(relation) %} + {%- set _materialized_view_sql -%} + select + mv.table_name as materialized_view, + pt.table_name as partitioned_table, + pt.partitioning_type, + pt.partitioning_field_name, + topt.partition_expiration_days, + topt.table_name as table_options_table, + topt.description, + topt.enable_refresh, + topt.friendly_name, + topt.expiration_timestamp as hours_to_expiration, + topt.kms_key_name, + topt.labels, + topt.max_staleness, + topt.refresh_interval_minutes, + from + `{{ relation.database }}.{{ relation.schema }}.INFORMATION_SCHEMA.MATERIALIZED_VIEWS` mv + left join + `{{ relation.database }}.{{ relation.schema }}.INFORMATION_SCHEMA.PARTITIONS` pt + on + mv.table_name = pt.table_name + left join + `{{ relation.database }}.{{ relation.schema }}.INFORMATION_SCHEMA.TABLE_OPTIONS` topt + on + mv.table_name = topt.table_name + where + mv.table_name = '{{ relation.name }}' + {%- endset %} + {% set _materialized_view = run_query(_materialized_view_sql) %} + + {% do return({'materialized_view': _materialized_viewy}) %} +{% endmacro %} diff --git a/tests/unit/test_bigquery_adapter.py b/tests/unit/test_bigquery_adapter.py index 10cb3f530..dc75d2c90 100644 --- a/tests/unit/test_bigquery_adapter.py +++ b/tests/unit/test_bigquery_adapter.py @@ -13,7 +13,6 @@ from unittest.mock import patch, MagicMock, Mock, create_autospec, ANY import dbt.dataclass_schema - from dbt.adapters.bigquery import PartitionConfig from dbt.adapters.bigquery import BigQueryCredentials from dbt.adapters.bigquery import BigQueryAdapter From 9e1d55914bc87cc4ee5785494f6aa665c5741d7c Mon Sep 17 00:00:00 2001 From: Matthew McKnight Date: Wed, 20 Sep 2023 16:48:42 -0500 Subject: [PATCH 08/53] create utility.py to add bool_setting method, update parse_model_node to use new method --- .../relation_configs/materialized_view.py | 20 ++++-------------- dbt/adapters/bigquery/utility.py | 21 +++++++++++++++++++ 2 files changed, 25 insertions(+), 16 deletions(-) create mode 100644 dbt/adapters/bigquery/utility.py diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index 1c5e622e4..178e96b4b 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -9,6 +9,7 @@ from dbt.contracts.graph.nodes import ModelNode from dbt.contracts.relation import ComponentName from dbt.adapters.bigquery.relation_configs.base import BigQueryReleationConfigBase +from dbt.adapters.bigquery.utility import bool_setting @dataclass(frozen=True, eq=True, unsafe_hash=True) @@ -102,22 +103,9 @@ def parse_model_node(cls, model_node: ModelNode) -> dict: "labels": model_node.config.extra.get("labels"), } - autorefresh_value = model_node.config.extra.get("enable_refresh") - if autorefresh_value is not None: - if isinstance(autorefresh_value, bool): - config_dict["enable_refresh"] = autorefresh_value - elif isinstance(autorefresh_value, str): - lower_autorefresh_value = autorefresh_value.lower() - if lower_autorefresh_value == "true": - config_dict["enable_refresh"] = True - elif lower_autorefresh_value == "false": - config_dict["enable_refresh"] = False - else: - raise ValueError( - "Invalide enable_refresh representation. Please used excepted value ex.(True, 'true', 'True')" - ) - else: - raise TypeError("Invalid autorefresh value: expecting boolean or str.") + raw_autorefresh_value = model_node.config.extra.get("enable_refresh") + auto_refresh_value = bool_setting(raw_autorefresh_value) + config_dict["enable_refresh"] = auto_refresh_value return config_dict diff --git a/dbt/adapters/bigquery/utility.py b/dbt/adapters/bigquery/utility.py new file mode 100644 index 000000000..dd7c3bb8e --- /dev/null +++ b/dbt/adapters/bigquery/utility.py @@ -0,0 +1,21 @@ +from typing import Any, Optional + + +def bool_setting(value: Optional[Any] = None) -> Optional[bool]: + if value is None: + return None + elif isinstance(value, bool): + return value + elif isinstance(value, str): + if value.lower() in ["true", "false"]: + return bool(value) + else: + raise ValueError( + f"Invalid input, " + f"expecting bool or str ex. (True, False, 'true', 'False'), recieved: {value}" + ) + else: + raise TypeError( + f"Invalide type for bool evaluation, " + f"expecting bool or str, recieved: {type(value)}" + ) From 71c903d81d91750342e5f0a5b1907890aa7c9ed9 Mon Sep 17 00:00:00 2001 From: Matthew McKnight Date: Thu, 21 Sep 2023 11:25:57 -0500 Subject: [PATCH 09/53] update describe.sql query --- .../relations/materialized_view/describe.sql | 23 +++++++------------ 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/dbt/include/bigquery/macros/relations/materialized_view/describe.sql b/dbt/include/bigquery/macros/relations/materialized_view/describe.sql index f2a568441..721667d1f 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/describe.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/describe.sql @@ -2,25 +2,18 @@ {%- set _materialized_view_sql -%} select mv.table_name as materialized_view, - pt.table_name as partitioned_table, - pt.partitioning_type, - pt.partitioning_field_name, - topt.partition_expiration_days, - topt.table_name as table_options_table, - topt.description, - topt.enable_refresh, - topt.friendly_name, - topt.expiration_timestamp as hours_to_expiration, - topt.kms_key_name, - topt.labels, - topt.max_staleness, - topt.refresh_interval_minutes, + c.column_name, + c.is_partitioning_column, + c.clustering_ordinal_position, + topt.option_name, + topt.option_value, + topt.option_type from `{{ relation.database }}.{{ relation.schema }}.INFORMATION_SCHEMA.MATERIALIZED_VIEWS` mv left join - `{{ relation.database }}.{{ relation.schema }}.INFORMATION_SCHEMA.PARTITIONS` pt + `{{ relation.database }}.{{ relation.schema }}.INFORMATION_SCHEMA.COLUMNS` c on - mv.table_name = pt.table_name + mv.table_name = c.table_name left join `{{ relation.database }}.{{ relation.schema }}.INFORMATION_SCHEMA.TABLE_OPTIONS` topt on From 377414c52c1beb4fbe51feb01039cfe59faa8922 Mon Sep 17 00:00:00 2001 From: Matthew McKnight Date: Thu, 21 Sep 2023 11:51:43 -0500 Subject: [PATCH 10/53] update describe sql to be able to create list of cluster by field names --- .../relations/materialized_view/describe.sql | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/dbt/include/bigquery/macros/relations/materialized_view/describe.sql b/dbt/include/bigquery/macros/relations/materialized_view/describe.sql index 721667d1f..195bd63a2 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/describe.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/describe.sql @@ -1,5 +1,23 @@ {% macro bigquery__describe_materialized_view(relation) %} {%- set _materialized_view_sql -%} + -- checks each column to see if its a cluster_by field then adds it to a new list + with ClusteringColumns as ( + select + table_name, + ARRAY_AGG( + case + when clustering_ordinal_position is not null then column_name + else null + end + ignore nulls + ) as clustering_fields + from + `{{ relation.database }}.{{ relation.schema }}.INFORMATION_SCHEMA.COLUMNS` + where + table_name = '{{ relation.name }}' + GROUP BY + table_name +) select mv.table_name as materialized_view, c.column_name, From d219af2049c1b89c4983ceb13e305e1906e493fa Mon Sep 17 00:00:00 2001 From: Matthew McKnight Date: Fri, 22 Sep 2023 15:28:21 -0500 Subject: [PATCH 11/53] initital attempt at modifying get_alter_materialized_view_as_sql --- .../macros/relations/materialized_view/alter.sql | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/dbt/include/bigquery/macros/relations/materialized_view/alter.sql b/dbt/include/bigquery/macros/relations/materialized_view/alter.sql index 8204c87ba..dfb7d46d3 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/alter.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/alter.sql @@ -6,10 +6,18 @@ ) %} {% if configuration_changes.requires_full_refresh %} + {{ get_replace_sql(existing_relation, relation, sql) }} + {% else %} - {{ bigquery__get_replace_materialized_view_as_sql(relation, sql) }} + {%- set auto_refresh = configuration_changes.auto_refresh -%} + {%- if auto_refresh -%}{{- log('Applying UPDATE AUTOREFRESH to: ' ~ relation) -}}{%- endif -%} - {% else %} + alter materialized view {{ relation }} + set options ( + {% if auto_refresh %}enable_refresh = {{ auto_refresh.context }}{% endif %} + ) + + {%- endif %} {% endmacro %} From 6bedc5e1b2be523f64e5443dafe0ed892ac721df Mon Sep 17 00:00:00 2001 From: Matthew McKnight Date: Tue, 26 Sep 2023 13:12:18 -0500 Subject: [PATCH 12/53] update to main and add space --- .../bigquery/macros/relations/materialized_view/alter.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/include/bigquery/macros/relations/materialized_view/alter.sql b/dbt/include/bigquery/macros/relations/materialized_view/alter.sql index dfb7d46d3..510b3371e 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/alter.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/alter.sql @@ -14,7 +14,7 @@ alter materialized view {{ relation }} set options ( - {% if auto_refresh %}enable_refresh = {{ auto_refresh.context }}{% endif %} + {% if auto_refresh %} enable_refresh = {{ auto_refresh.context }}{% endif %} ) {%- endif %} From da4d295da781d83d66c71b55aef32a835cdb94d9 Mon Sep 17 00:00:00 2001 From: Matthew McKnight Date: Wed, 27 Sep 2023 16:21:39 -0500 Subject: [PATCH 13/53] initial build out of mini classes for bigquery cluster, partition, auto_refresh in materialized_view dict. still need to fix some mypy issues --- dbt/adapters/bigquery/relation.py | 16 +- .../bigquery/relation_configs/auto_refresh.py | 80 ++++++++++ .../bigquery/relation_configs/cluster.py | 54 +++++++ .../relation_configs/materialized_view.py | 146 ++++++++---------- .../bigquery/relation_configs/partition.py | 68 ++++++++ 5 files changed, 272 insertions(+), 92 deletions(-) create mode 100644 dbt/adapters/bigquery/relation_configs/auto_refresh.py create mode 100644 dbt/adapters/bigquery/relation_configs/cluster.py create mode 100644 dbt/adapters/bigquery/relation_configs/partition.py diff --git a/dbt/adapters/bigquery/relation.py b/dbt/adapters/bigquery/relation.py index 03c882e64..9fa7145ab 100644 --- a/dbt/adapters/bigquery/relation.py +++ b/dbt/adapters/bigquery/relation.py @@ -79,22 +79,22 @@ def materialized_view_config_changeset( assert isinstance(existing_materialized_view, BigQueryMaterializedViewConfig) assert isinstance(new_materialized_view, BigQueryMaterializedViewConfig) - if new_materialized_view.enable_refresh != existing_materialized_view.enable_refresh: + if new_materialized_view.auto_refresh != existing_materialized_view.auto_refresh: config_change_collection.auto_refresh = BigQueryAutoRefreshConfigChange( action=RelationConfigChangeAction.alter, - context=new_materialized_view.enable_refresh, + context=new_materialized_view.auto_refresh, ) - if new_materialized_view.cluster_by != existing_materialized_view.cluster_by: - config_change_collection.cluster_by = BigQueryClusterConfigChange( + if new_materialized_view.cluster != existing_materialized_view.cluster: + config_change_collection.cluster = BigQueryClusterConfigChange( action=RelationConfigChangeAction.alter, - context=new_materialized_view.cluster_by, + context=new_materialized_view.cluster, ) - if new_materialized_view.partition_by != existing_materialized_view.partition_by: - config_change_collection.partition_by = BigQueryPartitionConfigChange( + if new_materialized_view.partition != existing_materialized_view.partition: + config_change_collection.partition = BigQueryPartitionConfigChange( action=RelationConfigChangeAction.alter, - context=new_materialized_view.partition_by, + context=new_materialized_view.partition, ) if config_change_collection: diff --git a/dbt/adapters/bigquery/relation_configs/auto_refresh.py b/dbt/adapters/bigquery/relation_configs/auto_refresh.py new file mode 100644 index 000000000..85c150195 --- /dev/null +++ b/dbt/adapters/bigquery/relation_configs/auto_refresh.py @@ -0,0 +1,80 @@ +from dataclasses import dataclass +from typing import Optional + +import agate +from dbt.adapters.relation_configs import ( + RelationConfigChange, + RelationConfigValidationMixin, +) +from dbt.adapters.bigquery.relation_configs.base import BigQueryReleationConfigBase +from dbt.contracts.graph.nodes import ModelNode +from dbt.adapters.bigquery.utility import bool_setting + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class BigQueryAutoRefreshConfig(BigQueryReleationConfigBase, RelationConfigValidationMixin): + """ + This config dictionary is comprised of three table options all centered around auto_refresh + - enable_refresh: Enables autoamtic refresh of materialized view when base table is + updated. + - refresh_interval_minutes: frequency at which a materialized view will be refeshed. + - Note: (default is 30 minutes) + - max_staleness: if the last refresh is within max_staleness interval, + BigQuery returns data directly from the materialized view without reading base table. + Otherwise it reads from the base to return results withing the staleness interval. + """ + + enable_refresh: Optional[bool] = True + refresh_interval_minutes: Optional[int] = 30 + max_staleness: Optional[str] = None + + @classmethod + def from_dict(cls, config_dict) -> "BigQueryAutoRefreshConfig": + kwargs_dict = { + "enable_refresh": config_dict.get("enabled_refresh"), + "refresh_interval_minutes": config_dict.get("refresh_interval_minutes"), + "max_staleness": config_dict.get("max_staleness"), + } + auto_refresh: "BigQueryAutoRefreshConfig" = super().from_dict(kwargs_dict) # type: ignore + return auto_refresh + + @classmethod + def parse_model_node(cls, model_node: ModelNode) -> dict: + config_dict = {} + raw_autorefresh_value = model_node.config.extra.get("enable_refresh") + + if raw_autorefresh_value: + auto_refresh_value = bool_setting(raw_autorefresh_value) + config_dict.update({"enable_refersh": auto_refresh_value}) + + if refresh_interval_minutes := model_node.config.extra.get("refresh_interval_minutes"): + config_dict.update({"refresh_interval_minutes": refresh_interval_minutes}) + + if max_staleness := model_node.config.extra.get("max_staleness"): + config_dict.update({"max_staleness": max_staleness}) + + return config_dict + + @classmethod + def parse_relation_results(cls, relation_results_entry: agate.Row) -> dict: + config_dict = {} + if enable_refresh := relation_results_entry.get("enable_refresh"): + auto_refresh_value = bool_setting(enable_refresh) + config_dict.update({"enable_refresh": auto_refresh_value}) + + if refresh_interval_minutes := relation_results_entry.get("refresh_interval_minutes"): + config_dict.update({"refresh_interval_minutes": refresh_interval_minutes}) + + if max_staleness := relation_results_entry.get("max_staleness"): + config_dict.update({"max_staleness": max_staleness}) + + return config_dict + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class BigQueryAutoRefreshConfigChange(RelationConfigChange): + context: BigQueryAutoRefreshConfig + + @property + def requires_full_refresh(self) -> bool: + return False diff --git a/dbt/adapters/bigquery/relation_configs/cluster.py b/dbt/adapters/bigquery/relation_configs/cluster.py new file mode 100644 index 000000000..1c35996a2 --- /dev/null +++ b/dbt/adapters/bigquery/relation_configs/cluster.py @@ -0,0 +1,54 @@ +from dataclasses import dataclass +from typing import List, FrozenSet, Optional, Union + +import agate +from dbt.adapters.relation_configs.config_change import RelationConfigChange +from dbt.adapters.relation_configs.config_validation import RelationConfigValidationMixin +from dbt.contracts.graph.nodes import ModelNode + +from dbt.adapters.bigquery.relation_configs.base import BigQueryReleationConfigBase + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class BigQueryClusterConfig(BigQueryReleationConfigBase, RelationConfigValidationMixin): + """ + - cluster_by: A comma-seperated list of of col references to determine cluster. + - Note: Can contain up to four colms in list. + """ + + cluster_by: Optional[Union[FrozenSet[List[str]], str]] = None + + @classmethod + def from_dict(cls, config_dict) -> "BigQueryClusterConfig": + kwargs_dict = { + "cluster_by": config_dict.get("cluster_by"), + } + cluster: "BigQueryClusterConfig" = super().from_dict(kwargs_dict) # type: ignore + return cluster + + @classmethod + def parse_model_node(cls, model_node: ModelNode) -> dict: + config_dict = {} + + if cluster_by := model_node.config.extra.get("cluster_by"): + config_dict.update({"cluster_by": cluster_by}) + + return config_dict + + @classmethod + def parse_relation_results(cls, relation_results_entry: agate.Row) -> dict: + config_dict = {} + + if cluster_by := relation_results_entry.config.extra.get("cluster_by"): + config_dict.update({"cluster_by": cluster_by}) + + return config_dict + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class BigQueryClusterConfigChange(RelationConfigChange): + context: BigQueryClusterConfig + + @property + def requires_full_refresh(self) -> bool: + return True diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index 178e96b4b..2e375481d 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -1,15 +1,25 @@ from dataclasses import dataclass -from typing import Any, Dict, List, FrozenSet, Optional, Union +from typing import Dict, Optional import agate from dbt.exceptions import DbtRuntimeError -from dbt.adapters.relation_configs.config_change import RelationConfigChange from dbt.adapters.relation_configs.config_base import RelationResults from dbt.adapters.relation_configs.config_validation import RelationConfigValidationMixin from dbt.contracts.graph.nodes import ModelNode from dbt.contracts.relation import ComponentName from dbt.adapters.bigquery.relation_configs.base import BigQueryReleationConfigBase -from dbt.adapters.bigquery.utility import bool_setting +from dbt.adapters.bigquery.relation_configs.auto_refresh import ( + BigQueryAutoRefreshConfig, + BigQueryAutoRefreshConfigChange, +) +from dbt.adapters.bigquery.relation_configs.partition import ( + BigQueryPartitionConfig, + BigQueryPartitionConfigChange, +) +from dbt.adapters.bigquery.relation_configs.cluster import ( + BigQueryClusterConfig, + BigQueryClusterConfigChange, +) @dataclass(frozen=True, eq=True, unsafe_hash=True) @@ -22,22 +32,9 @@ class BigQueryMaterializedViewConfig(BigQueryReleationConfigBase, RelationConfig - materialized_view_name: Name of the materialized view - schema: Dataset name of the materialized view - database: Project name of the database - - cluster_by: A comma-seperated list of of col references to determine cluster. - - Note: Can contain up to four colms in list. - - partition_by: Expression to describe how to partition materialized view. - - Note: Must be partitioned in the same was as base table is partitioned. - - enable_refresh: Enables autoamtic refresh of materialized view when base table is - updated. - - refresh_interval_minutes: frequency at which a materialized view will be refeshed. - - Note: (default is 30 minutes) - hours_to_expiration: The time when table expires. - Note: If not set table persists - - max_staleness: if the last refresh is within max_staleness interval, - BigQuery returns data directly from the materialized view without reading base table. - Otherwise it reads from the base to return results withing the staleness interval. - kms_key_name: user defined Cloud KMS encryption key. - - friendly_name: A descriptive name for this table. - - description: A user-friendly description of this table. - labels: used to organized and group table - Note on usage can be found @@ -47,16 +44,11 @@ class BigQueryMaterializedViewConfig(BigQueryReleationConfigBase, RelationConfig materialized_view_name: str schema_name: str database_name: str - cluster_by: Optional[Union[FrozenSet[List[str]], str]] = None - partition_by: Optional[FrozenSet[Dict[str, Any]]] = None - partition_expiration_date: Optional[int] = None - enable_refresh: Optional[bool] = True - refresh_interval_minutes: Optional[int] = 30 + cluster: BigQueryClusterConfig = BigQueryClusterConfig() + partition: BigQueryPartitionConfig = BigQueryPartitionConfig() + auto_refresh: BigQueryAutoRefreshConfig = BigQueryAutoRefreshConfig() hours_to_expiration: Optional[int] = None - max_staleness: Optional[str] = None kms_key_name: Optional[str] = None - friendly_name: Optional[str] = None - description: Optional[str] = None labels: Optional[Dict[str, str]] = None @classmethod @@ -69,19 +61,20 @@ def from_dict(cls, config_dict) -> "BigQueryMaterializedViewConfig": "database_name": cls._render_part( ComponentName.Database, config_dict.get("database_name") ), - "cluster_by": config_dict.get("cluster_by"), - "partition_by": config_dict.get("partition_by"), - "partition_expiration_date": config_dict.get("partition_expiration_date"), - "enable_refresh": config_dict.get("enable_refresh"), - "refresh_interval_minutes": config_dict.get("refresh_interval_minutes"), "hours_to_expiration": config_dict.get("hours_to_expiration"), - "max_staleness": config_dict.get("max_staleness"), "kms_key_name": config_dict.get("kms_key_name"), - "friendly_name": config_dict.get("friendly_name"), - "description": config_dict.get("description"), "labels": config_dict.get("labels"), } + if auto_refresh := config_dict.get("auto_refresh"): + kwargs_dict.update({"auto_refresh": BigQueryAutoRefreshConfig.from_dict(auto_refresh)}) + + if partition := config_dict.get("partition"): + kwargs_dict.update({"partition": BigQueryPartitionConfig.from_dict(partition)}) + + if cluster := config_dict.get("cluster"): + kwargs_dict.update({"cluster": BigQueryClusterConfig.from_dict(cluster)}) + materialized_view: "BigQueryMaterializedViewConfig" = super().from_dict(kwargs_dict) # type: ignore return materialized_view @@ -91,21 +84,21 @@ def parse_model_node(cls, model_node: ModelNode) -> dict: "materialized_view_name": model_node.identifier, "schema_name": model_node.schema, "database_name": model_node.database, - "cluster_by": model_node.config.extra.get("cluster_by"), - "partition_by": model_node.config.extra.get("partition_by"), - "partition_expiration_date": model_node.config.extra.get("partition_expiration_date"), - "refresh_interval_minutes": model_node.config.extra.get("refresh_interval_minutes"), "hours_to_expiration": model_node.config.extra.get("hours_to_expiration"), - "max_staleness": model_node.config.extra.get("max_staleness"), "kms_key_name": model_node.config.extra.get("kms_key_name"), - "friendly_name": model_node.config.extra.get("friendly_name"), - "description": model_node.config.extra.get("description"), "labels": model_node.config.extra.get("labels"), } - raw_autorefresh_value = model_node.config.extra.get("enable_refresh") - auto_refresh_value = bool_setting(raw_autorefresh_value) - config_dict["enable_refresh"] = auto_refresh_value + if model_node.config.get("auto_refresh"): + config_dict.update( + {"auto_refresh": BigQueryAutoRefreshConfig.parse_model_node(model_node)} + ) + + if model_node.config.get("partition"): + config_dict.update({"partition": BigQueryPartitionConfig.parse_model_node(model_node)}) + + if model_node.config.get("cluster"): + config_dict.update({"cluster": BigQueryClusterConfig.parse_model_node(model_node)}) return config_dict @@ -121,63 +114,48 @@ def parse_relation_results(cls, relation_results: RelationResults) -> dict: "materialized_view_name": materialized_view.get("materialized_view_name"), "schema_name": materialized_view.get("schema"), "database_name": materialized_view.get("database"), - "cluster_by": materialized_view.get("cluster_by"), - "partition_by": materialized_view.get("partition_by"), - "partition_expiration_date": materialized_view.get("partition_expiration_date"), - "enable_refresh": materialized_view.get("enabled_refresh"), - "refresh_interval_minutes": materialized_view.get("refresh_interval_minutes"), "hours_to_expiration": materialized_view.get("hours_to_expiration"), - "max_staleness": materialized_view.get("max_staleness"), "kms_key_name": materialized_view.get("kms_key_name"), - "friendly_name": materialized_view.get("friendly_name"), - "description": materialized_view.get("description"), "labels": materialized_view.get("labels"), } - return config_dict - - -@dataclass(frozen=True, eq=True, unsafe_hash=True) -class BigQueryAutoRefreshConfigChange(RelationConfigChange): - context: Optional[bool] = None - - @property - def requires_full_refresh(self) -> bool: - return False - + if materialized_view.get("auto_refresh"): + config_dict.update( + { + "auto_refresh": BigQueryAutoRefreshConfig.parse_relation_results( + materialized_view + ) + } + ) + + if materialized_view.get("partition"): + config_dict.update( + {"partition": BigQueryPartitionConfig.parse_relation_results(materialized_view)} + ) + + if materialized_view.get("cluster"): + config_dict.update( + {"cluster": BigQueryClusterConfig.parse_relation_results(materialized_view)} + ) -@dataclass(frozen=True, eq=True, unsafe_hash=True) -class BigQueryPartitionConfigChange(RelationConfigChange): - context: Optional[FrozenSet[Dict[str, Any]]] = None - - @property - def requires_full_refresh(self) -> bool: - return True - - -@dataclass(frozen=True, eq=True, unsafe_hash=True) -class BigQueryClusterConfigChange(RelationConfigChange): - context: Optional[Union[FrozenSet[List[str]], str]] = None - - @property - def requires_full_refresh(self) -> bool: - return True + return config_dict @dataclass class BigQueryMaterializedViewConfigChangeset: - partition_by: Optional[BigQueryPartitionConfigChange] = None - partition_expiration_days: Optional[BigQueryPartitionConfigChange] = None - cluster_by: Optional[BigQueryClusterConfigChange] = None + partition: Optional[BigQueryPartitionConfigChange] = None + cluster: Optional[BigQueryClusterConfigChange] = None auto_refresh: Optional[BigQueryAutoRefreshConfigChange] = None + kms_key_name: Optional[str] = None + labels: Optional[Dict[str, str]] = None @property def requires_full_refresh(self) -> bool: return any( { self.auto_refresh.requires_full_refresh if self.auto_refresh else False, - self.partition_by.requires_full_refresh if self.partition_by else False, - self.cluster_by.requires_full_refresh if self.cluster_by else False, + self.partition.requires_full_refresh if self.partition else False, + self.cluster.requires_full_refresh if self.cluster else False, } ) @@ -185,8 +163,8 @@ def requires_full_refresh(self) -> bool: def has_changes(self) -> bool: return any( { - self.partition_by if self.partition_by else False, - self.cluster_by if self.cluster_by else False, + self.partition if self.partition else False, + self.cluster if self.cluster else False, self.auto_refresh if self.auto_refresh else False, } ) diff --git a/dbt/adapters/bigquery/relation_configs/partition.py b/dbt/adapters/bigquery/relation_configs/partition.py new file mode 100644 index 000000000..ee51fd203 --- /dev/null +++ b/dbt/adapters/bigquery/relation_configs/partition.py @@ -0,0 +1,68 @@ +from dataclasses import dataclass +from typing import Any, Dict, FrozenSet, Optional + +import agate + +from dbt.adapters.bigquery.relation_configs.base import BigQueryReleationConfigBase +from dbt.adapters.relation_configs.config_change import RelationConfigChange +from dbt.adapters.relation_configs.config_validation import RelationConfigValidationMixin +from dbt.contracts.graph.nodes import ModelNode + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class BigQueryPartitionConfig(BigQueryReleationConfigBase, RelationConfigValidationMixin): + """ + This config dictionary is comprised of 2table options all centered around partitioning + - partition_by: Expression to describe how to partition materialized view. + - Note: Must be partitioned in the same was as base table is partitioned. + - partition_expiration_days: The default lifetime, in days, of all partitions in a + partitioned table + """ + + partition_by: Optional[FrozenSet[Dict[str, Any]]] = None + partition_expiration_days: Optional[int] = None + + @classmethod + def from_dict(cls, config_dict) -> "BigQueryPartitionConfig": + kwargs_dict = { + "partition_by": config_dict.get("partition_by"), + "partition_expiration_days": config_dict.geet("partition_expiration_days"), + } + + partition: "BigQueryPartitionConfig" = super().from_dict(kwargs_dict) # type: ignore + return partition + + @classmethod + def parse_model_node(cls, model_node: ModelNode) -> dict: + config_dict = {} + + if partition_by := model_node.config.extra.get("partition_by"): + config_dict.update({"partition_by": partition_by}) + + if partition_expiration_days := model_node.config.extra.get("partition_expiration_days"): + config_dict.update({"partition_expiration_days": partition_expiration_days}) + + return config_dict + + @classmethod + def parse_relation_results(cls, relation_results_entry: agate.Row) -> dict: + config_dict = {} + + if partition_by := relation_results_entry.config.extra.get("partition_by"): + config_dict.update({"partition_by": partition_by}) + + if partition_expiration_days := relation_results_entry.config.extra.get( + "partition_expiration_days" + ): + config_dict.update({"partition_expiration_days": partition_expiration_days}) + + return config_dict + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class BigQueryPartitionConfigChange(RelationConfigChange): + context: BigQueryPartitionConfig + + @property + def requires_full_refresh(self) -> bool: + return True From 0eba24681ea2e29a6f4fab6da9fd9f62bc1c7ab4 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 27 Sep 2023 18:04:02 -0400 Subject: [PATCH 14/53] remove local package (dbt-bigquery) on `make dev-uninstall` --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index c75b0c6a9..fc6536f98 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,7 @@ dev: ## Installs adapter in develop mode along with development dependencies dev-uninstall: ## Uninstalls all packages while maintaining the virtual environment ## Useful when updating versions, or if you accidentally installed into the system interpreter pip freeze | grep -v "^-e" | cut -d "@" -f1 | xargs pip uninstall -y + pip uninstall -y dbt-bigquery .PHONY: ubuntu-py311 ubuntu-py311: ## Builds and runs an Ubuntu Python 3.11 development container From c56076e7914e8e8ffb965f6d026f3aa099da9acc Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 27 Sep 2023 18:07:17 -0400 Subject: [PATCH 15/53] update changelog entry to encompass all features in this branch --- .changes/unreleased/Features-20230913-130445.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changes/unreleased/Features-20230913-130445.yaml b/.changes/unreleased/Features-20230913-130445.yaml index 65486fc26..42311fd92 100644 --- a/.changes/unreleased/Features-20230913-130445.yaml +++ b/.changes/unreleased/Features-20230913-130445.yaml @@ -1,5 +1,5 @@ kind: Features -body: provide logic to be able to alter, or reacut to chnges to trigger autorefresh +body: "Support change monitoring for materialized views, including: autorefresh, clustering, partitioning" time: 2023-09-13T13:04:45.761294-05:00 custom: Author: McKnight-42 From 4c2904ea63bd9389769f10dbe2d93e3962a4c2aa Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 27 Sep 2023 18:14:35 -0400 Subject: [PATCH 16/53] remove alteration to setup/teardown for materialized view materialization --- .../macros/materializations/materialized_view.sql | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 dbt/include/bigquery/macros/materializations/materialized_view.sql diff --git a/dbt/include/bigquery/macros/materializations/materialized_view.sql b/dbt/include/bigquery/macros/materializations/materialized_view.sql deleted file mode 100644 index 8b4168aec..000000000 --- a/dbt/include/bigquery/macros/materializations/materialized_view.sql +++ /dev/null @@ -1,8 +0,0 @@ -{% macro materialized_view_setup(backup_relation, intermediate_relation, pre_hooks) %} - {{ run_hooks(pre_hooks, inside_transaction=False) }} -{% endmacro %} - - -{% macro materialized_view_teardown(backup_relation, intermediate_relation, post_hooks) %} - {{ run_hooks(post_hooks, inside_transaction=False) }} -{% endmacro %} From 8f722fe6e21260d3f989c546aaa0b4853fc3c685 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 27 Sep 2023 18:22:07 -0400 Subject: [PATCH 17/53] fix spelling error, prepend underscore on base class module to mark as private to its package --- dbt/adapters/bigquery/relation_configs/{base.py => _base.py} | 2 +- dbt/adapters/bigquery/relation_configs/materialized_view.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename dbt/adapters/bigquery/relation_configs/{base.py => _base.py} (97%) diff --git a/dbt/adapters/bigquery/relation_configs/base.py b/dbt/adapters/bigquery/relation_configs/_base.py similarity index 97% rename from dbt/adapters/bigquery/relation_configs/base.py rename to dbt/adapters/bigquery/relation_configs/_base.py index 9cf44345d..37f9423e9 100644 --- a/dbt/adapters/bigquery/relation_configs/base.py +++ b/dbt/adapters/bigquery/relation_configs/_base.py @@ -13,7 +13,7 @@ @dataclass(frozen=True, eq=True, unsafe_hash=True) -class BigQueryReleationConfigBase(RelationConfigBase): +class BigQueryRelationConfigBase(RelationConfigBase): @classmethod def include_policy(cls) -> Policy: return BigQueryIncludePolicy() diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index 2e375481d..a0741bd3b 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -7,7 +7,7 @@ from dbt.adapters.relation_configs.config_validation import RelationConfigValidationMixin from dbt.contracts.graph.nodes import ModelNode from dbt.contracts.relation import ComponentName -from dbt.adapters.bigquery.relation_configs.base import BigQueryReleationConfigBase +from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase from dbt.adapters.bigquery.relation_configs.auto_refresh import ( BigQueryAutoRefreshConfig, BigQueryAutoRefreshConfigChange, @@ -23,7 +23,7 @@ @dataclass(frozen=True, eq=True, unsafe_hash=True) -class BigQueryMaterializedViewConfig(BigQueryReleationConfigBase, RelationConfigValidationMixin): +class BigQueryMaterializedViewConfig(BigQueryRelationConfigBase, RelationConfigValidationMixin): """ This config follow the specs found here: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_materialized_view_statement From fe9aa657593e84c2aedc2c5076b01b552e57c502 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 27 Sep 2023 18:26:10 -0400 Subject: [PATCH 18/53] update call to relation to include quote and include policies, update case to match convention --- .../bigquery/macros/relations/materialized_view/refresh.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/include/bigquery/macros/relations/materialized_view/refresh.sql b/dbt/include/bigquery/macros/relations/materialized_view/refresh.sql index 54ad76e1e..8daae92ed 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/refresh.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/refresh.sql @@ -1,3 +1,3 @@ {% macro bigquery__refresh_materialized_view(relation) %} - CALL BQ.REFRESH_MATERIALIZED_VIEW('{{ relation.database }}.{{ relation.schema }}.{{ relation.identifier }}'); + call bq.refresh_materialized_view('{{ relation }}'); {% endmacro %} From 47b787d270785361608e65f1017f895a311ec804 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 27 Sep 2023 19:03:43 -0400 Subject: [PATCH 19/53] update create statement to include partition, cluster, and options clauses --- .../relations/materialized_view/create.sql | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/dbt/include/bigquery/macros/relations/materialized_view/create.sql b/dbt/include/bigquery/macros/relations/materialized_view/create.sql index 198a3a04a..19be74374 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/create.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/create.sql @@ -1,3 +1,17 @@ {% macro bigquery__get_create_materialized_view_as_sql(relation, sql) %} - create materialized view if not exists {{ relation }} as {{ sql }} + + {%- set partition_config_raw = config.get('partition_by', none) -%} + {%- set partition_config = adapter.parse_partition_by(partition_config_raw) -%} + {%- if partition_config.time_ingestion_partitioning -%} + {% do exceptions.raise_compiler_error("Time ingestion partitioning is not supported for materialized views") %} + {%- endif -%} + + {%- set cluster_config = config.get('cluster_by', none) -%} + + create materialized view if not exists {{ relation }} + {{ partition_by(partition_config) }} + {{ cluster_by(cluster_config) }} + {{ bigquery_options(adapter.materialized_view_options(config, model)) }} + as {{ sql }} + {% endmacro %} From 8398d658eeb29fb689136e03126b3492e2b8681c Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 27 Sep 2023 20:48:04 -0400 Subject: [PATCH 20/53] update partition config to align with existing dbt-bigquery table config --- .../bigquery/relation_configs/partition.py | 103 ++++++++++++------ 1 file changed, 72 insertions(+), 31 deletions(-) diff --git a/dbt/adapters/bigquery/relation_configs/partition.py b/dbt/adapters/bigquery/relation_configs/partition.py index ee51fd203..231c63166 100644 --- a/dbt/adapters/bigquery/relation_configs/partition.py +++ b/dbt/adapters/bigquery/relation_configs/partition.py @@ -1,60 +1,101 @@ +from collections import namedtuple from dataclasses import dataclass -from typing import Any, Dict, FrozenSet, Optional +from typing import Any, Dict, Optional import agate -from dbt.adapters.bigquery.relation_configs.base import BigQueryReleationConfigBase -from dbt.adapters.relation_configs.config_change import RelationConfigChange -from dbt.adapters.relation_configs.config_validation import RelationConfigValidationMixin +from dbt.adapters.relation_configs import RelationConfigChange, RelationResults from dbt.contracts.graph.nodes import ModelNode +from dbt.dataclass_schema import StrEnum + +from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase + + +class PartitionDataType(StrEnum): + TIMESTAMP = "timestamp" + DATE = "date" + DATETIME = "datetime" + INT64 = "int64" + + +class PartitionGranularity(StrEnum): + HOUR = "hour" + DAY = "day" + MONTH = "month" + YEAR = "year" + + +PartitionRange = namedtuple("PartitionRange", ["start", "end", "interval"]) @dataclass(frozen=True, eq=True, unsafe_hash=True) -class BigQueryPartitionConfig(BigQueryReleationConfigBase, RelationConfigValidationMixin): +class BigQueryPartitionConfig(BigQueryRelationConfigBase): """ - This config dictionary is comprised of 2table options all centered around partitioning - - partition_by: Expression to describe how to partition materialized view. - - Note: Must be partitioned in the same was as base table is partitioned. - - partition_expiration_days: The default lifetime, in days, of all partitions in a - partitioned table + This config manages table options supporting partitioning. See the following for more information: + - https://docs.getdbt.com/reference/resource-configs/bigquery-configs#using-table-partitioning-and-clustering + - https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#partition_expression + + - field: field to partition on + - Note: Must be partitioned in the same way as base table + - data_type: data type of `field` + - granularity: size of the buckets for non-int64 `data_type` + - range: size of the buckets for int64 `data_type` + - time_ingestion_partitioning: supports partitioning by row creation time """ - partition_by: Optional[FrozenSet[Dict[str, Any]]] = None - partition_expiration_days: Optional[int] = None + field: str + data_type: PartitionDataType + granularity: Optional[PartitionGranularity] = None + range: Optional[PartitionRange] = None + time_ingestion_partitioning: Optional[bool] = False @classmethod - def from_dict(cls, config_dict) -> "BigQueryPartitionConfig": + def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryPartitionConfig": + # required kwargs_dict = { - "partition_by": config_dict.get("partition_by"), - "partition_expiration_days": config_dict.geet("partition_expiration_days"), + "field": config_dict.get("field"), + "data_type": config_dict.get("data_type"), } + # optional + if granularity := config_dict.get("granularity"): + config_dict.update({"granularity": granularity}) + if partition_range := config_dict.get("range"): + config_dict.update({"range": PartitionRange(**partition_range)}) + partition: "BigQueryPartitionConfig" = super().from_dict(kwargs_dict) # type: ignore return partition @classmethod - def parse_model_node(cls, model_node: ModelNode) -> dict: - config_dict = {} - - if partition_by := model_node.config.extra.get("partition_by"): - config_dict.update({"partition_by": partition_by}) - - if partition_expiration_days := model_node.config.extra.get("partition_expiration_days"): - config_dict.update({"partition_expiration_days": partition_expiration_days}) + def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: + partition_by = model_node.config.extra.get("partition_by", {}) + + config_dict = { + "field": partition_by.get("field"), + "data_type": partition_by.get("data_type"), + "granularity": partition_by.get("granularity"), + "range": partition_by.get("range", {}), + } return config_dict @classmethod - def parse_relation_results(cls, relation_results_entry: agate.Row) -> dict: - config_dict = {} + def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, Any]: + relation_results_entry: agate.Row = cls._get_first_row(relation_results["relation"]) # type: ignore - if partition_by := relation_results_entry.config.extra.get("partition_by"): - config_dict.update({"partition_by": partition_by}) + config_dict = { + "field": relation_results_entry.get("field"), + "data_type": relation_results_entry.get("data_type"), + "granularity": relation_results_entry.get("granularity"), + } - if partition_expiration_days := relation_results_entry.config.extra.get( - "partition_expiration_days" - ): - config_dict.update({"partition_expiration_days": partition_expiration_days}) + # combine range fields into dictionary, like the model config + range_dict = { + "start": relation_results_entry.get("partition_start"), + "end": relation_results_entry.get("partition_end"), + "interval": relation_results_entry.get("partition_interval"), + } + config_dict.update({"range": range_dict}) return config_dict From 2d744f28281c7a15a8a15283bd66c4cb39076df9 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 27 Sep 2023 20:48:22 -0400 Subject: [PATCH 21/53] update cluster config to align with existing dbt-bigquery table config --- .../bigquery/relation_configs/cluster.py | 40 ++++++++++--------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/dbt/adapters/bigquery/relation_configs/cluster.py b/dbt/adapters/bigquery/relation_configs/cluster.py index 1c35996a2..04d2f26a7 100644 --- a/dbt/adapters/bigquery/relation_configs/cluster.py +++ b/dbt/adapters/bigquery/relation_configs/cluster.py @@ -1,46 +1,50 @@ from dataclasses import dataclass -from typing import List, FrozenSet, Optional, Union +from typing import Any, Dict, FrozenSet import agate -from dbt.adapters.relation_configs.config_change import RelationConfigChange -from dbt.adapters.relation_configs.config_validation import RelationConfigValidationMixin +from dbt.adapters.relation_configs import RelationConfigChange, RelationResults from dbt.contracts.graph.nodes import ModelNode -from dbt.adapters.bigquery.relation_configs.base import BigQueryReleationConfigBase +from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase @dataclass(frozen=True, eq=True, unsafe_hash=True) -class BigQueryClusterConfig(BigQueryReleationConfigBase, RelationConfigValidationMixin): +class BigQueryClusterConfig(BigQueryRelationConfigBase): """ - - cluster_by: A comma-seperated list of of col references to determine cluster. - - Note: Can contain up to four colms in list. + This config manages table options supporting clustering. See the following for more information: + - https://docs.getdbt.com/reference/resource-configs/bigquery-configs#using-table-partitioning-and-clustering + - https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#clustering_column_list + + - fields: set of columns to cluster on + - Note: can contain up to four columns """ - cluster_by: Optional[Union[FrozenSet[List[str]], str]] = None + fields: FrozenSet[str] @classmethod - def from_dict(cls, config_dict) -> "BigQueryClusterConfig": - kwargs_dict = { - "cluster_by": config_dict.get("cluster_by"), - } + def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryClusterConfig": + kwargs_dict = {"fields": config_dict.get("fields")} cluster: "BigQueryClusterConfig" = super().from_dict(kwargs_dict) # type: ignore return cluster @classmethod - def parse_model_node(cls, model_node: ModelNode) -> dict: + def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: config_dict = {} if cluster_by := model_node.config.extra.get("cluster_by"): - config_dict.update({"cluster_by": cluster_by}) + # users may input a single field as a string + if isinstance(cluster_by, str): + cluster_by = [cluster_by] + config_dict.update({"fields": frozenset(cluster_by)}) return config_dict @classmethod - def parse_relation_results(cls, relation_results_entry: agate.Row) -> dict: - config_dict = {} + def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, Any]: + relation_results_entry: agate.Row = cls._get_first_row(relation_results["relation"]) # type: ignore - if cluster_by := relation_results_entry.config.extra.get("cluster_by"): - config_dict.update({"cluster_by": cluster_by}) + field_list = relation_results_entry.get("cluster_by", "") + config_dict = {"fields": frozenset(field_list.split(","))} return config_dict From f75a40674b284f4dba14462d557088ff4375e0fa Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 27 Sep 2023 21:12:23 -0400 Subject: [PATCH 22/53] update auto refresh config to align with other configs --- .../bigquery/relation_configs/auto_refresh.py | 73 ++++++++++--------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/dbt/adapters/bigquery/relation_configs/auto_refresh.py b/dbt/adapters/bigquery/relation_configs/auto_refresh.py index 85c150195..6829ab83b 100644 --- a/dbt/adapters/bigquery/relation_configs/auto_refresh.py +++ b/dbt/adapters/bigquery/relation_configs/auto_refresh.py @@ -1,27 +1,26 @@ from dataclasses import dataclass -from typing import Optional +from typing import Any, Dict, Optional import agate -from dbt.adapters.relation_configs import ( - RelationConfigChange, - RelationConfigValidationMixin, -) -from dbt.adapters.bigquery.relation_configs.base import BigQueryReleationConfigBase +from dbt.adapters.relation_configs import RelationConfigChange, RelationResults from dbt.contracts.graph.nodes import ModelNode + +from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase from dbt.adapters.bigquery.utility import bool_setting @dataclass(frozen=True, eq=True, unsafe_hash=True) -class BigQueryAutoRefreshConfig(BigQueryReleationConfigBase, RelationConfigValidationMixin): +class BigQueryAutoRefreshConfig(BigQueryRelationConfigBase): """ - This config dictionary is comprised of three table options all centered around auto_refresh - - enable_refresh: Enables autoamtic refresh of materialized view when base table is - updated. - - refresh_interval_minutes: frequency at which a materialized view will be refeshed. - - Note: (default is 30 minutes) - - max_staleness: if the last refresh is within max_staleness interval, - BigQuery returns data directly from the materialized view without reading base table. - Otherwise it reads from the base to return results withing the staleness interval. + This config manages materialized view options supporting automatic refresh. See the following for more information: + - https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#materialized_view_option_list + - https://cloud.google.com/bigquery/docs/materialized-views-create#manage_staleness_and_refresh_frequency + + - enable_refresh: enables automatic refresh based on `refresh_interval_minutes` + - refresh_interval_minutes: frequency at which a materialized view will be refreshed + - max_staleness: if the last refresh is within the max_staleness interval, + BigQuery returns data directly from the materialized view (faster/cheaper) without reading the base table, + otherwise it reads from the base table (slower/more expensive) to meet the staleness requirement """ enable_refresh: Optional[bool] = True @@ -29,23 +28,28 @@ class BigQueryAutoRefreshConfig(BigQueryReleationConfigBase, RelationConfigValid max_staleness: Optional[str] = None @classmethod - def from_dict(cls, config_dict) -> "BigQueryAutoRefreshConfig": - kwargs_dict = { - "enable_refresh": config_dict.get("enabled_refresh"), - "refresh_interval_minutes": config_dict.get("refresh_interval_minutes"), - "max_staleness": config_dict.get("max_staleness"), - } + def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryAutoRefreshConfig": + kwargs_dict = {} + + # optional + if "enable_refresh" in config_dict: # boolean + kwargs_dict.update({"enable_refresh": config_dict.get("enable_refresh")}) + if refresh_interval_minutes := config_dict.get("refresh_interval_minutes"): + kwargs_dict.update({"refresh_interval_minutes": refresh_interval_minutes}) + if max_staleness := config_dict.get("max_staleness"): + kwargs_dict.update({"max_staleness": max_staleness}) + auto_refresh: "BigQueryAutoRefreshConfig" = super().from_dict(kwargs_dict) # type: ignore return auto_refresh @classmethod - def parse_model_node(cls, model_node: ModelNode) -> dict: + def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: config_dict = {} - raw_autorefresh_value = model_node.config.extra.get("enable_refresh") - if raw_autorefresh_value: - auto_refresh_value = bool_setting(raw_autorefresh_value) - config_dict.update({"enable_refersh": auto_refresh_value}) + # check for the key since this is a boolean + if "enable_refresh" in model_node.config.extra: + enable_refresh = model_node.config.extra.get("enable_refresh") + config_dict.update({"enable_refresh": bool_setting(enable_refresh)}) if refresh_interval_minutes := model_node.config.extra.get("refresh_interval_minutes"): config_dict.update({"refresh_interval_minutes": refresh_interval_minutes}) @@ -56,17 +60,14 @@ def parse_model_node(cls, model_node: ModelNode) -> dict: return config_dict @classmethod - def parse_relation_results(cls, relation_results_entry: agate.Row) -> dict: - config_dict = {} - if enable_refresh := relation_results_entry.get("enable_refresh"): - auto_refresh_value = bool_setting(enable_refresh) - config_dict.update({"enable_refresh": auto_refresh_value}) + def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, Any]: + relation_results_entry: agate.Row = cls._get_first_row(relation_results.get("relation")) # type: ignore - if refresh_interval_minutes := relation_results_entry.get("refresh_interval_minutes"): - config_dict.update({"refresh_interval_minutes": refresh_interval_minutes}) - - if max_staleness := relation_results_entry.get("max_staleness"): - config_dict.update({"max_staleness": max_staleness}) + config_dict = { + "enable_refresh": bool_setting(relation_results_entry.get("enable_refresh")), + "refresh_interval_minutes": relation_results_entry.get("refresh_interval_minutes"), + "max_staleness": relation_results_entry.get("max_staleness"), + } return config_dict From ae42de01d0359c46d2e6a895e25ba0e91a343e22 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 27 Sep 2023 21:28:18 -0400 Subject: [PATCH 23/53] revert parse results to accept an agate Row --- .../bigquery/relation_configs/auto_refresh.py | 7 ++----- dbt/adapters/bigquery/relation_configs/cluster.py | 7 ++----- dbt/adapters/bigquery/relation_configs/partition.py | 12 +++++------- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/dbt/adapters/bigquery/relation_configs/auto_refresh.py b/dbt/adapters/bigquery/relation_configs/auto_refresh.py index 6829ab83b..05b248787 100644 --- a/dbt/adapters/bigquery/relation_configs/auto_refresh.py +++ b/dbt/adapters/bigquery/relation_configs/auto_refresh.py @@ -2,7 +2,7 @@ from typing import Any, Dict, Optional import agate -from dbt.adapters.relation_configs import RelationConfigChange, RelationResults +from dbt.adapters.relation_configs import RelationConfigChange from dbt.contracts.graph.nodes import ModelNode from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase @@ -60,15 +60,12 @@ def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: return config_dict @classmethod - def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, Any]: - relation_results_entry: agate.Row = cls._get_first_row(relation_results.get("relation")) # type: ignore - + def parse_relation_results(cls, relation_results_entry: agate.Row) -> Dict[str, Any]: # type: ignore config_dict = { "enable_refresh": bool_setting(relation_results_entry.get("enable_refresh")), "refresh_interval_minutes": relation_results_entry.get("refresh_interval_minutes"), "max_staleness": relation_results_entry.get("max_staleness"), } - return config_dict diff --git a/dbt/adapters/bigquery/relation_configs/cluster.py b/dbt/adapters/bigquery/relation_configs/cluster.py index 04d2f26a7..0c775ab92 100644 --- a/dbt/adapters/bigquery/relation_configs/cluster.py +++ b/dbt/adapters/bigquery/relation_configs/cluster.py @@ -2,7 +2,7 @@ from typing import Any, Dict, FrozenSet import agate -from dbt.adapters.relation_configs import RelationConfigChange, RelationResults +from dbt.adapters.relation_configs import RelationConfigChange from dbt.contracts.graph.nodes import ModelNode from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase @@ -40,12 +40,9 @@ def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: return config_dict @classmethod - def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, Any]: - relation_results_entry: agate.Row = cls._get_first_row(relation_results["relation"]) # type: ignore - + def parse_relation_results(cls, relation_results_entry: agate.Row) -> Dict[str, Any]: # type: ignore field_list = relation_results_entry.get("cluster_by", "") config_dict = {"fields": frozenset(field_list.split(","))} - return config_dict diff --git a/dbt/adapters/bigquery/relation_configs/partition.py b/dbt/adapters/bigquery/relation_configs/partition.py index 231c63166..b106d36d1 100644 --- a/dbt/adapters/bigquery/relation_configs/partition.py +++ b/dbt/adapters/bigquery/relation_configs/partition.py @@ -4,7 +4,7 @@ import agate -from dbt.adapters.relation_configs import RelationConfigChange, RelationResults +from dbt.adapters.relation_configs import RelationConfigChange from dbt.contracts.graph.nodes import ModelNode from dbt.dataclass_schema import StrEnum @@ -80,13 +80,11 @@ def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: return config_dict @classmethod - def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, Any]: - relation_results_entry: agate.Row = cls._get_first_row(relation_results["relation"]) # type: ignore - + def parse_relation_results(cls, relation_results_entry: agate.Row) -> Dict[str, Any]: # type: ignore config_dict = { - "field": relation_results_entry.get("field"), - "data_type": relation_results_entry.get("data_type"), - "granularity": relation_results_entry.get("granularity"), + "field": relation_results_entry.get("partition_field"), + "data_type": relation_results_entry.get("partition_data_type"), + "granularity": relation_results_entry.get("partition_granularity"), } # combine range fields into dictionary, like the model config From 3a682b4c1bcdd7f83b64b7d9d356c5ab628cf2ba Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 27 Sep 2023 21:30:52 -0400 Subject: [PATCH 24/53] update how defaults are handled --- .../relation_configs/materialized_view.py | 93 ++++++++++--------- 1 file changed, 49 insertions(+), 44 deletions(-) diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index a0741bd3b..b88921c63 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -1,12 +1,12 @@ from dataclasses import dataclass -from typing import Dict, Optional +from typing import Any, Dict, Optional import agate -from dbt.exceptions import DbtRuntimeError -from dbt.adapters.relation_configs.config_base import RelationResults -from dbt.adapters.relation_configs.config_validation import RelationConfigValidationMixin +from dbt.adapters.relation_configs import RelationResults from dbt.contracts.graph.nodes import ModelNode from dbt.contracts.relation import ComponentName +from dbt.exceptions import DbtRuntimeError + from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase from dbt.adapters.bigquery.relation_configs.auto_refresh import ( BigQueryAutoRefreshConfig, @@ -23,37 +23,37 @@ @dataclass(frozen=True, eq=True, unsafe_hash=True) -class BigQueryMaterializedViewConfig(BigQueryRelationConfigBase, RelationConfigValidationMixin): +class BigQueryMaterializedViewConfig(BigQueryRelationConfigBase): """ This config follow the specs found here: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_materialized_view_statement The following parameters are configurable by dbt: - - materialized_view_name: Name of the materialized view - - schema: Dataset name of the materialized view - - database: Project name of the database - - hours_to_expiration: The time when table expires. - - Note: If not set table persists - - kms_key_name: user defined Cloud KMS encryption key. - - labels: used to organized and group table - - Note on usage can be found - - There are currently no non-configurable parameters. + - materialized_view_name: name of the materialized view + - schema: dataset name of the materialized view + - database: project name of the database + - partition: object containing partition information + - cluster: object containing cluster information + - auto_refresh: object containing refresh scheduling information + - hours_to_expiration: The time when table expires + - kms_key_name: user defined Cloud KMS encryption key + - labels: used to organized and group objects """ materialized_view_name: str schema_name: str database_name: str - cluster: BigQueryClusterConfig = BigQueryClusterConfig() - partition: BigQueryPartitionConfig = BigQueryPartitionConfig() - auto_refresh: BigQueryAutoRefreshConfig = BigQueryAutoRefreshConfig() + partition: Optional[BigQueryPartitionConfig] = None + cluster: Optional[BigQueryClusterConfig] = None + auto_refresh: Optional[BigQueryAutoRefreshConfig] = None hours_to_expiration: Optional[int] = None kms_key_name: Optional[str] = None labels: Optional[Dict[str, str]] = None @classmethod - def from_dict(cls, config_dict) -> "BigQueryMaterializedViewConfig": - kwargs_dict = { + def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryMaterializedViewConfig": + # required + kwargs_dict: Dict[str, Any] = { "materialized_view_name": cls._render_part( ComponentName.Identifier, config_dict.get("materialized_view_name") ), @@ -61,13 +61,15 @@ def from_dict(cls, config_dict) -> "BigQueryMaterializedViewConfig": "database_name": cls._render_part( ComponentName.Database, config_dict.get("database_name") ), - "hours_to_expiration": config_dict.get("hours_to_expiration"), - "kms_key_name": config_dict.get("kms_key_name"), - "labels": config_dict.get("labels"), } - if auto_refresh := config_dict.get("auto_refresh"): - kwargs_dict.update({"auto_refresh": BigQueryAutoRefreshConfig.from_dict(auto_refresh)}) + # optional + if "hours_to_expiration" in config_dict: + kwargs_dict.update({"hours_to_expiration": config_dict.get("hours_to_expiration")}) + if "kms_key_name" in config_dict: + kwargs_dict.update({"kms_key_name": config_dict.get("kms_key_name")}) + if "labels" in config_dict: + kwargs_dict.update({"labels": config_dict.get("labels")}) if partition := config_dict.get("partition"): kwargs_dict.update({"partition": BigQueryPartitionConfig.from_dict(partition)}) @@ -75,11 +77,14 @@ def from_dict(cls, config_dict) -> "BigQueryMaterializedViewConfig": if cluster := config_dict.get("cluster"): kwargs_dict.update({"cluster": BigQueryClusterConfig.from_dict(cluster)}) + if auto_refresh := config_dict.get("auto_refresh"): + kwargs_dict.update({"auto_refresh": BigQueryAutoRefreshConfig.from_dict(auto_refresh)}) + materialized_view: "BigQueryMaterializedViewConfig" = super().from_dict(kwargs_dict) # type: ignore return materialized_view @classmethod - def parse_model_node(cls, model_node: ModelNode) -> dict: + def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: config_dict = { "materialized_view_name": model_node.identifier, "schema_name": model_node.schema, @@ -89,21 +94,21 @@ def parse_model_node(cls, model_node: ModelNode) -> dict: "labels": model_node.config.extra.get("labels"), } - if model_node.config.get("auto_refresh"): - config_dict.update( - {"auto_refresh": BigQueryAutoRefreshConfig.parse_model_node(model_node)} - ) - - if model_node.config.get("partition"): + if "partition_by" in model_node.config: config_dict.update({"partition": BigQueryPartitionConfig.parse_model_node(model_node)}) - if model_node.config.get("cluster"): + if "cluster_by" in model_node.config: config_dict.update({"cluster": BigQueryClusterConfig.parse_model_node(model_node)}) + if "enable_refresh" in model_node.config: + config_dict.update( + {"auto_refresh": BigQueryAutoRefreshConfig.parse_model_node(model_node)} + ) + return config_dict @classmethod - def parse_relation_results(cls, relation_results: RelationResults) -> dict: + def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, Any]: materialized_view_config = relation_results.get("materialized_view") if isinstance(materialized_view_config, agate.Table): materialized_view = cls._get_first_row(materialized_view_config) @@ -119,23 +124,23 @@ def parse_relation_results(cls, relation_results: RelationResults) -> dict: "labels": materialized_view.get("labels"), } - if materialized_view.get("auto_refresh"): + if materialized_view.get("partition_field"): config_dict.update( - { - "auto_refresh": BigQueryAutoRefreshConfig.parse_relation_results( - materialized_view - ) - } + {"partition": BigQueryPartitionConfig.parse_relation_results(materialized_view)} ) - if materialized_view.get("partition"): + if materialized_view.get("cluster_by"): config_dict.update( - {"partition": BigQueryPartitionConfig.parse_relation_results(materialized_view)} + {"cluster": BigQueryClusterConfig.parse_relation_results(materialized_view)} ) - if materialized_view.get("cluster"): + if materialized_view.get("enable_refresh"): config_dict.update( - {"cluster": BigQueryClusterConfig.parse_relation_results(materialized_view)} + { + "auto_refresh": BigQueryAutoRefreshConfig.parse_relation_results( + materialized_view + ) + } ) return config_dict From 894fdb2ecb69541a75e9fa028c9eac9593bbd2f0 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 27 Sep 2023 21:32:33 -0400 Subject: [PATCH 25/53] add description option to materialized view since it is handled for tables --- dbt/adapters/bigquery/relation_configs/materialized_view.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index b88921c63..76f258dd3 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -49,6 +49,7 @@ class BigQueryMaterializedViewConfig(BigQueryRelationConfigBase): hours_to_expiration: Optional[int] = None kms_key_name: Optional[str] = None labels: Optional[Dict[str, str]] = None + description: Optional[str] = None @classmethod def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryMaterializedViewConfig": @@ -70,6 +71,8 @@ def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryMaterializedViewConf kwargs_dict.update({"kms_key_name": config_dict.get("kms_key_name")}) if "labels" in config_dict: kwargs_dict.update({"labels": config_dict.get("labels")}) + if "description" in config_dict: + kwargs_dict.update({"description": config_dict.get("description")}) if partition := config_dict.get("partition"): kwargs_dict.update({"partition": BigQueryPartitionConfig.from_dict(partition)}) @@ -92,6 +95,7 @@ def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: "hours_to_expiration": model_node.config.extra.get("hours_to_expiration"), "kms_key_name": model_node.config.extra.get("kms_key_name"), "labels": model_node.config.extra.get("labels"), + "description": model_node.config.extra.get("description"), } if "partition_by" in model_node.config: @@ -122,6 +126,7 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, "hours_to_expiration": materialized_view.get("hours_to_expiration"), "kms_key_name": materialized_view.get("kms_key_name"), "labels": materialized_view.get("labels"), + "description": materialized_view.get("description"), } if materialized_view.get("partition_field"): From 95c6c013e63b8df1009190b3a4ff646b0c890bcc Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 27 Sep 2023 21:32:56 -0400 Subject: [PATCH 26/53] add description option to materialized view since it is handled for tables --- dbt/adapters/bigquery/relation_configs/materialized_view.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index 76f258dd3..f2626753e 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -38,6 +38,7 @@ class BigQueryMaterializedViewConfig(BigQueryRelationConfigBase): - hours_to_expiration: The time when table expires - kms_key_name: user defined Cloud KMS encryption key - labels: used to organized and group objects + - description: user description for materialized view """ materialized_view_name: str From 3ea6cbec6c47cd48cb5f17b7db297af0d5b9fdb3 Mon Sep 17 00:00:00 2001 From: Matthew McKnight Date: Thu, 28 Sep 2023 01:01:02 -0500 Subject: [PATCH 27/53] fix method call chain in parse_relation_results on cluster, partition, and auto_refresh --- dbt/adapters/bigquery/relation_configs/cluster.py | 2 +- dbt/adapters/bigquery/relation_configs/partition.py | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/dbt/adapters/bigquery/relation_configs/cluster.py b/dbt/adapters/bigquery/relation_configs/cluster.py index 1c35996a2..162ec3ff3 100644 --- a/dbt/adapters/bigquery/relation_configs/cluster.py +++ b/dbt/adapters/bigquery/relation_configs/cluster.py @@ -39,7 +39,7 @@ def parse_model_node(cls, model_node: ModelNode) -> dict: def parse_relation_results(cls, relation_results_entry: agate.Row) -> dict: config_dict = {} - if cluster_by := relation_results_entry.config.extra.get("cluster_by"): + if cluster_by := relation_results_entry.get("cluster_by"): config_dict.update({"cluster_by": cluster_by}) return config_dict diff --git a/dbt/adapters/bigquery/relation_configs/partition.py b/dbt/adapters/bigquery/relation_configs/partition.py index ee51fd203..b95f048a2 100644 --- a/dbt/adapters/bigquery/relation_configs/partition.py +++ b/dbt/adapters/bigquery/relation_configs/partition.py @@ -48,12 +48,10 @@ def parse_model_node(cls, model_node: ModelNode) -> dict: def parse_relation_results(cls, relation_results_entry: agate.Row) -> dict: config_dict = {} - if partition_by := relation_results_entry.config.extra.get("partition_by"): + if partition_by := relation_results_entry.get("partition_by"): config_dict.update({"partition_by": partition_by}) - if partition_expiration_days := relation_results_entry.config.extra.get( - "partition_expiration_days" - ): + if partition_expiration_days := relation_results_entry.get("partition_expiration_days"): config_dict.update({"partition_expiration_days": partition_expiration_days}) return config_dict From 8a31379746f0cf7397331aee984579b2808593b7 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 28 Sep 2023 18:41:38 -0400 Subject: [PATCH 28/53] move PartitionConfig into relation_configs to be used by materialized views, update references --- .../bigquery/relation_configs/partition.py | 183 ++++++++++-------- tests/unit/test_bigquery_adapter.py | 5 +- 2 files changed, 109 insertions(+), 79 deletions(-) diff --git a/dbt/adapters/bigquery/relation_configs/partition.py b/dbt/adapters/bigquery/relation_configs/partition.py index b106d36d1..c99d70235 100644 --- a/dbt/adapters/bigquery/relation_configs/partition.py +++ b/dbt/adapters/bigquery/relation_configs/partition.py @@ -1,97 +1,128 @@ -from collections import namedtuple from dataclasses import dataclass -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional import agate - -from dbt.adapters.relation_configs import RelationConfigChange from dbt.contracts.graph.nodes import ModelNode -from dbt.dataclass_schema import StrEnum - -from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase - - -class PartitionDataType(StrEnum): - TIMESTAMP = "timestamp" - DATE = "date" - DATETIME = "datetime" - INT64 = "int64" - - -class PartitionGranularity(StrEnum): - HOUR = "hour" - DAY = "day" - MONTH = "month" - YEAR = "year" - - -PartitionRange = namedtuple("PartitionRange", ["start", "end", "interval"]) - +from dbt.dataclass_schema import dbtClassMixin, ValidationError +import dbt.exceptions +from dbt.adapters.relation_configs import RelationConfigChange -@dataclass(frozen=True, eq=True, unsafe_hash=True) -class BigQueryPartitionConfig(BigQueryRelationConfigBase): - """ - This config manages table options supporting partitioning. See the following for more information: - - https://docs.getdbt.com/reference/resource-configs/bigquery-configs#using-table-partitioning-and-clustering - - https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#partition_expression - - - field: field to partition on - - Note: Must be partitioned in the same way as base table - - data_type: data type of `field` - - granularity: size of the buckets for non-int64 `data_type` - - range: size of the buckets for int64 `data_type` - - time_ingestion_partitioning: supports partitioning by row creation time - """ +@dataclass +class PartitionConfig(dbtClassMixin): field: str - data_type: PartitionDataType - granularity: Optional[PartitionGranularity] = None - range: Optional[PartitionRange] = None - time_ingestion_partitioning: Optional[bool] = False + data_type: str = "date" + granularity: str = "day" + range: Optional[Dict[str, Any]] = None + time_ingestion_partitioning: bool = False + copy_partitions: bool = False + + PARTITION_DATE = "_PARTITIONDATE" + PARTITION_TIME = "_PARTITIONTIME" + + def data_type_for_partition(self): + """Return the data type of partitions for replacement. + When time_ingestion_partitioning is enabled, the data type supported are date & timestamp. + """ + if not self.time_ingestion_partitioning: + return self.data_type + + return "date" if self.data_type == "date" else "timestamp" + + def reject_partition_field_column(self, columns: List[Any]) -> List[str]: + return [c for c in columns if not c.name.upper() == self.field.upper()] + + def data_type_should_be_truncated(self): + """Return true if the data type should be truncated instead of cast to the data type.""" + return not ( + self.data_type == "int64" or (self.data_type == "date" and self.granularity == "day") + ) + + def time_partitioning_field(self) -> str: + """Return the time partitioning field name based on the data type. + The default is _PARTITIONTIME, but for date it is _PARTITIONDATE + else it will fail statements for type mismatch.""" + if self.data_type == "date": + return self.PARTITION_DATE + else: + return self.PARTITION_TIME + + def insertable_time_partitioning_field(self) -> str: + """Return the insertable time partitioning field name based on the data type. + Practically, only _PARTITIONTIME works so far. + The function is meant to keep the call sites consistent as it might evolve.""" + return self.PARTITION_TIME + + def render(self, alias: Optional[str] = None): + column: str = ( + self.field if not self.time_ingestion_partitioning else self.time_partitioning_field() + ) + if alias: + column = f"{alias}.{column}" + + if self.data_type_should_be_truncated(): + return f"{self.data_type}_trunc({column}, {self.granularity})" + else: + return column + + def render_wrapped(self, alias: Optional[str] = None): + """Wrap the partitioning column when time involved to ensure it is properly cast to matching time.""" + # if data type is going to be truncated, no need to wrap + if ( + self.data_type in ("date", "timestamp", "datetime") + and not self.data_type_should_be_truncated() + and not ( + self.time_ingestion_partitioning and self.data_type == "date" + ) # _PARTITIONDATE is already a date + ): + return f"{self.data_type}({self.render(alias)})" + else: + return self.render(alias) @classmethod - def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryPartitionConfig": - # required - kwargs_dict = { - "field": config_dict.get("field"), - "data_type": config_dict.get("data_type"), - } - - # optional - if granularity := config_dict.get("granularity"): - config_dict.update({"granularity": granularity}) - if partition_range := config_dict.get("range"): - config_dict.update({"range": PartitionRange(**partition_range)}) - - partition: "BigQueryPartitionConfig" = super().from_dict(kwargs_dict) # type: ignore - return partition + def parse(cls, raw_partition_by) -> Optional["PartitionConfig"]: + if raw_partition_by is None: + return None + try: + cls.validate(raw_partition_by) + return cls.from_dict( + { + key: (value.lower() if isinstance(value, str) else value) + for key, value in raw_partition_by.items() + } + ) + except ValidationError as exc: + raise dbt.exceptions.DbtValidationError("Could not parse partition config") from exc + except TypeError: + raise dbt.exceptions.CompilationError( + f"Invalid partition_by config:\n" + f" Got: {raw_partition_by}\n" + f' Expected a dictionary with "field" and "data_type" keys' + ) @classmethod def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: - partition_by = model_node.config.extra.get("partition_by", {}) - - config_dict = { - "field": partition_by.get("field"), - "data_type": partition_by.get("data_type"), - "granularity": partition_by.get("granularity"), - "range": partition_by.get("range", {}), - } - - return config_dict + """ + Parse model node into a raw config for `PartitionConfig.parse` + """ + return model_node.config.extra.get("partition_by") @classmethod - def parse_relation_results(cls, relation_results_entry: agate.Row) -> Dict[str, Any]: # type: ignore + def parse_relation_results(cls, describe_relation_results: agate.Row) -> Dict[str, Any]: + """ + Parse the results of a describe query into a raw config for `PartitionConfig.parse` + """ config_dict = { - "field": relation_results_entry.get("partition_field"), - "data_type": relation_results_entry.get("partition_data_type"), - "granularity": relation_results_entry.get("partition_granularity"), + "field": describe_relation_results.get("partition_field"), + "data_type": describe_relation_results.get("partition_data_type"), + "granularity": describe_relation_results.get("partition_granularity"), } # combine range fields into dictionary, like the model config range_dict = { - "start": relation_results_entry.get("partition_start"), - "end": relation_results_entry.get("partition_end"), - "interval": relation_results_entry.get("partition_interval"), + "start": describe_relation_results.get("partition_start"), + "end": describe_relation_results.get("partition_end"), + "interval": describe_relation_results.get("partition_interval"), } config_dict.update({"range": range_dict}) @@ -100,7 +131,7 @@ def parse_relation_results(cls, relation_results_entry: agate.Row) -> Dict[str, @dataclass(frozen=True, eq=True, unsafe_hash=True) class BigQueryPartitionConfigChange(RelationConfigChange): - context: BigQueryPartitionConfig + context: PartitionConfig @property def requires_full_refresh(self) -> bool: diff --git a/tests/unit/test_bigquery_adapter.py b/tests/unit/test_bigquery_adapter.py index 1a9805fd5..926547e10 100644 --- a/tests/unit/test_bigquery_adapter.py +++ b/tests/unit/test_bigquery_adapter.py @@ -8,9 +8,8 @@ from unittest.mock import patch, MagicMock, create_autospec import dbt.dataclass_schema -from dbt.adapters.bigquery import PartitionConfig -from dbt.adapters.bigquery import BigQueryAdapter -from dbt.adapters.bigquery import BigQueryRelation +from dbt.adapters.bigquery.relation_configs import PartitionConfig +from dbt.adapters.bigquery import BigQueryAdapter, BigQueryRelation from dbt.adapters.bigquery import Plugin as BigQueryPlugin from google.cloud.bigquery.table import Table from dbt.adapters.bigquery.connections import _sanitize_label, _VALIDATE_LABEL_LENGTH_LIMIT From 630913c93c6668fe603796260b28fbdcbe62b7e5 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 28 Sep 2023 18:52:10 -0400 Subject: [PATCH 29/53] move PartitionConfig into relation_configs to be used by materialized views, update references; add get_materialized_view_options in alignment with get_table_options; fix wild import order; add factory method for materialized views to be used in the jinja template; update expiration timestamp attribute; --- dbt/adapters/bigquery/impl.py | 188 ++++++------------ dbt/adapters/bigquery/relation.py | 30 ++- .../bigquery/relation_configs/__init__.py | 13 +- .../relation_configs/materialized_view.py | 56 ++++-- 4 files changed, 132 insertions(+), 155 deletions(-) diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index 8fc1b69bb..2b62bd288 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -1,58 +1,51 @@ from dataclasses import dataclass +import json import threading -from typing import Dict, List, Optional, Any, Set, Union, Type - -from dbt.contracts.connection import AdapterResponse -from dbt.contracts.graph.nodes import ColumnLevelConstraint, ModelLevelConstraint, ConstraintType # type: ignore -from dbt.dataclass_schema import dbtClassMixin, ValidationError - -import dbt.deprecations -import dbt.exceptions -import dbt.clients.agate_helper +import time +from typing import Any, Dict, List, Optional, Type, Set, Union +import agate from dbt import ui # type: ignore from dbt.adapters.base import ( # type: ignore + AdapterConfig, BaseAdapter, + BaseRelation, ConstraintSupport, - available, + PythonJobHelper, RelationType, - BaseRelation, SchemaSearchMap, - AdapterConfig, - PythonJobHelper, + available, ) - from dbt.adapters.cache import _make_ref_key_dict # type: ignore - -from dbt.adapters.bigquery.column import get_nested_column_data_types -from dbt.adapters.bigquery.relation import BigQueryRelation -from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset, is_access_entry_in_dataset -from dbt.adapters.bigquery import BigQueryColumn -from dbt.adapters.bigquery import BigQueryConnectionManager -from dbt.adapters.bigquery.python_submissions import ( - ClusterDataprocHelper, - ServerlessDataProcHelper, -) -from dbt.adapters.bigquery.connections import BigQueryAdapterResponse +import dbt.clients.agate_helper +from dbt.contracts.connection import AdapterResponse from dbt.contracts.graph.manifest import Manifest -from dbt.events import ( - AdapterLogger, -) +from dbt.contracts.graph.nodes import ColumnLevelConstraint, ConstraintType, ModelLevelConstraint # type: ignore +from dbt.dataclass_schema import dbtClassMixin +import dbt.deprecations +from dbt.events import AdapterLogger from dbt.events.functions import fire_event from dbt.events.types import SchemaCreation, SchemaDrop +import dbt.exceptions from dbt.utils import filter_null_values - -import google.auth import google.api_core +import google.auth import google.oauth2 -import google.cloud.exceptions import google.cloud.bigquery - from google.cloud.bigquery import AccessEntry, SchemaField +import google.cloud.exceptions + +from dbt.adapters.bigquery import BigQueryColumn, BigQueryConnectionManager +from dbt.adapters.bigquery.column import get_nested_column_data_types +from dbt.adapters.bigquery.connections import BigQueryAdapterResponse +from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset, is_access_entry_in_dataset +from dbt.adapters.bigquery.python_submissions import ( + ClusterDataprocHelper, + ServerlessDataProcHelper, +) +from dbt.adapters.bigquery.relation import BigQueryRelation +from dbt.adapters.bigquery.relation_configs import BigQueryMaterializedViewConfig, PartitionConfig -import time -import agate -import json logger = AdapterLogger("BigQuery") @@ -70,99 +63,6 @@ def sql_escape(string): return json.dumps(string)[1:-1] -@dataclass -class PartitionConfig(dbtClassMixin): - field: str - data_type: str = "date" - granularity: str = "day" - range: Optional[Dict[str, Any]] = None - time_ingestion_partitioning: bool = False - copy_partitions: bool = False - - PARTITION_DATE = "_PARTITIONDATE" - PARTITION_TIME = "_PARTITIONTIME" - - def data_type_for_partition(self): - """Return the data type of partitions for replacement. - When time_ingestion_partitioning is enabled, the data type supported are date & timestamp. - """ - if not self.time_ingestion_partitioning: - return self.data_type - - return "date" if self.data_type == "date" else "timestamp" - - def reject_partition_field_column(self, columns: List[Any]) -> List[str]: - return [c for c in columns if not c.name.upper() == self.field.upper()] - - def data_type_should_be_truncated(self): - """Return true if the data type should be truncated instead of cast to the data type.""" - return not ( - self.data_type == "int64" or (self.data_type == "date" and self.granularity == "day") - ) - - def time_partitioning_field(self) -> str: - """Return the time partitioning field name based on the data type. - The default is _PARTITIONTIME, but for date it is _PARTITIONDATE - else it will fail statements for type mismatch.""" - if self.data_type == "date": - return self.PARTITION_DATE - else: - return self.PARTITION_TIME - - def insertable_time_partitioning_field(self) -> str: - """Return the insertable time partitioning field name based on the data type. - Practically, only _PARTITIONTIME works so far. - The function is meant to keep the call sites consistent as it might evolve.""" - return self.PARTITION_TIME - - def render(self, alias: Optional[str] = None): - column: str = ( - self.field if not self.time_ingestion_partitioning else self.time_partitioning_field() - ) - if alias: - column = f"{alias}.{column}" - - if self.data_type_should_be_truncated(): - return f"{self.data_type}_trunc({column}, {self.granularity})" - else: - return column - - def render_wrapped(self, alias: Optional[str] = None): - """Wrap the partitioning column when time involved to ensure it is properly cast to matching time.""" - # if data type is going to be truncated, no need to wrap - if ( - self.data_type in ("date", "timestamp", "datetime") - and not self.data_type_should_be_truncated() - and not ( - self.time_ingestion_partitioning and self.data_type == "date" - ) # _PARTITIONDATE is already a date - ): - return f"{self.data_type}({self.render(alias)})" - else: - return self.render(alias) - - @classmethod - def parse(cls, raw_partition_by) -> Optional["PartitionConfig"]: - if raw_partition_by is None: - return None - try: - cls.validate(raw_partition_by) - return cls.from_dict( - { - key: (value.lower() if isinstance(value, str) else value) - for key, value in raw_partition_by.items() - } - ) - except ValidationError as exc: - raise dbt.exceptions.DbtValidationError("Could not parse partition config") from exc - except TypeError: - raise dbt.exceptions.CompilationError( - f"Invalid partition_by config:\n" - f" Got: {raw_partition_by}\n" - f' Expected a dictionary with "field" and "data_type" keys' - ) - - @dataclass class GrantTarget(dbtClassMixin): dataset: str @@ -849,6 +749,38 @@ def get_view_options(self, config: Dict[str, Any], node: Dict[str, Any]) -> Dict opts = self.get_common_options(config, node) return opts + @available.parse(lambda *a, **k: {}) + def get_materialized_view_options( + self, + materialized_view: BigQueryMaterializedViewConfig, + ) -> Dict[str, Any]: + opts: Dict[str, Any] = {} + + if expiration_timestamp := materialized_view.expiration_timestamp: + opts.update({"expiration_timestamp": expiration_timestamp}) + + if description := materialized_view.description: + escaped_description = sql_escape(description) + opts.update({"description": f'"""{escaped_description}"""'}) + + if labels := materialized_view.labels: + opts.update({"labels": list(labels.items())}) + + if kms_key_name := materialized_view.kms_key_name: + opts.update({"kms_key_name": f"'{kms_key_name}'"}) + + if auto_refresh := materialized_view.auto_refresh: + opts.update( + { + "enable_refresh": auto_refresh.enable_refresh, + "refresh_interval_minutes": auto_refresh.refresh_interval_minutes, + } + ) + if max_staleness := auto_refresh.max_staleness: + opts.update({"max_staleness": max_staleness}) + + return opts + @available.parse_none def grant_access_to(self, entity, entity_type, role, grant_target_dict): """ diff --git a/dbt/adapters/bigquery/relation.py b/dbt/adapters/bigquery/relation.py index 9fa7145ab..eed27b254 100644 --- a/dbt/adapters/bigquery/relation.py +++ b/dbt/adapters/bigquery/relation.py @@ -14,6 +14,7 @@ BigQueryClusterConfigChange, BigQueryPartitionConfigChange, ) +from dbt.contracts.graph.nodes import ModelNode from dbt.contracts.relation import RelationType from dbt.exceptions import CompilationError from dbt.utils import filter_null_values @@ -65,33 +66,46 @@ def project(self): def dataset(self): return self.schema + @classmethod + def materialized_view_from_model_node( + cls, model_node: ModelNode + ) -> BigQueryMaterializedViewConfig: + return BigQueryMaterializedViewConfig.from_model_node(model_node) + @classmethod def materialized_view_config_changeset( - cls, relaation_results: RelationResults, runtime_config: RuntimeConfigObject + cls, relation_results: RelationResults, runtime_config: RuntimeConfigObject ) -> Optional[BigQueryMaterializedViewConfigChangeset]: config_change_collection = BigQueryMaterializedViewConfigChangeset() existing_materialized_view = BigQueryMaterializedViewConfig.from_relation_results( - relaation_results - ) - new_materialized_view = BigQueryMaterializedViewConfig.from_model_node( - runtime_config.model + relation_results ) + new_materialized_view = cls.materialized_view_from_model_node(runtime_config.model) assert isinstance(existing_materialized_view, BigQueryMaterializedViewConfig) assert isinstance(new_materialized_view, BigQueryMaterializedViewConfig) - if new_materialized_view.auto_refresh != existing_materialized_view.auto_refresh: + if ( + new_materialized_view.auto_refresh != existing_materialized_view.auto_refresh + and new_materialized_view.auto_refresh + ): config_change_collection.auto_refresh = BigQueryAutoRefreshConfigChange( action=RelationConfigChangeAction.alter, context=new_materialized_view.auto_refresh, ) - if new_materialized_view.cluster != existing_materialized_view.cluster: + if ( + new_materialized_view.cluster != existing_materialized_view.cluster + and new_materialized_view.cluster + ): config_change_collection.cluster = BigQueryClusterConfigChange( action=RelationConfigChangeAction.alter, context=new_materialized_view.cluster, ) - if new_materialized_view.partition != existing_materialized_view.partition: + if ( + new_materialized_view.partition != existing_materialized_view.partition + and new_materialized_view.partition + ): config_change_collection.partition = BigQueryPartitionConfigChange( action=RelationConfigChangeAction.alter, context=new_materialized_view.partition, diff --git a/dbt/adapters/bigquery/relation_configs/__init__.py b/dbt/adapters/bigquery/relation_configs/__init__.py index f3b8ae50b..e50246bbf 100644 --- a/dbt/adapters/bigquery/relation_configs/__init__.py +++ b/dbt/adapters/bigquery/relation_configs/__init__.py @@ -1,8 +1,17 @@ +from dbt.adapters.bigquery.relation_configs.auto_refresh import ( + BigQueryAutoRefreshConfig, + BigQueryAutoRefreshConfigChange, +) +from dbt.adapters.bigquery.relation_configs.cluster import ( + BigQueryClusterConfig, + BigQueryClusterConfigChange, +) from dbt.adapters.bigquery.relation_configs.materialized_view import ( BigQueryMaterializedViewConfig, BigQueryMaterializedViewConfigChangeset, - BigQueryAutoRefreshConfigChange, - BigQueryClusterConfigChange, +) +from dbt.adapters.bigquery.relation_configs.partition import ( + PartitionConfig, BigQueryPartitionConfigChange, ) from dbt.adapters.bigquery.relation_configs.policies import ( diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index f2626753e..7fdf4477a 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from datetime import datetime, timedelta from typing import Any, Dict, Optional import agate @@ -13,7 +14,7 @@ BigQueryAutoRefreshConfigChange, ) from dbt.adapters.bigquery.relation_configs.partition import ( - BigQueryPartitionConfig, + PartitionConfig, BigQueryPartitionConfigChange, ) from dbt.adapters.bigquery.relation_configs.cluster import ( @@ -44,10 +45,10 @@ class BigQueryMaterializedViewConfig(BigQueryRelationConfigBase): materialized_view_name: str schema_name: str database_name: str - partition: Optional[BigQueryPartitionConfig] = None + partition: Optional[PartitionConfig] = None cluster: Optional[BigQueryClusterConfig] = None auto_refresh: Optional[BigQueryAutoRefreshConfig] = None - hours_to_expiration: Optional[int] = None + expiration_timestamp: Optional[datetime] = None kms_key_name: Optional[str] = None labels: Optional[Dict[str, str]] = None description: Optional[str] = None @@ -66,17 +67,19 @@ def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryMaterializedViewConf } # optional - if "hours_to_expiration" in config_dict: - kwargs_dict.update({"hours_to_expiration": config_dict.get("hours_to_expiration")}) - if "kms_key_name" in config_dict: - kwargs_dict.update({"kms_key_name": config_dict.get("kms_key_name")}) - if "labels" in config_dict: - kwargs_dict.update({"labels": config_dict.get("labels")}) - if "description" in config_dict: - kwargs_dict.update({"description": config_dict.get("description")}) + optional_attributes = [ + "expiration_timestamp", + "kms_key_name", + "labels", + "description", + ] + optional_attributes_set_by_user = { + k: v for k, v in config_dict.items() if k in optional_attributes + } + kwargs_dict.update(optional_attributes_set_by_user) if partition := config_dict.get("partition"): - kwargs_dict.update({"partition": BigQueryPartitionConfig.from_dict(partition)}) + kwargs_dict.update({"partition": PartitionConfig.parse(partition)}) if cluster := config_dict.get("cluster"): kwargs_dict.update({"cluster": BigQueryClusterConfig.from_dict(cluster)}) @@ -87,20 +90,39 @@ def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryMaterializedViewConf materialized_view: "BigQueryMaterializedViewConfig" = super().from_dict(kwargs_dict) # type: ignore return materialized_view + @classmethod + def from_model_node(cls, model_node: ModelNode) -> "BigQueryMaterializedViewConfig": + materialized_view = super().from_model_node(model_node) + if isinstance(materialized_view, BigQueryMaterializedViewConfig): + return materialized_view + else: + raise DbtRuntimeError( + f"An unexpected error occurred in BigQueryMaterializedViewConfig.from_model_node:\n" + f" Expected: BigQueryMaterializedViewConfig\n" + f" Actual: {materialized_view}" + ) + @classmethod def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: config_dict = { "materialized_view_name": model_node.identifier, "schema_name": model_node.schema, "database_name": model_node.database, - "hours_to_expiration": model_node.config.extra.get("hours_to_expiration"), "kms_key_name": model_node.config.extra.get("kms_key_name"), "labels": model_node.config.extra.get("labels"), - "description": model_node.config.extra.get("description"), } + if description := model_node.config.extra.get("description"): + if model_node.config.persist_docs: + config_dict.update({"description": description}) + + if hours_to_expiration := model_node.config.extra.get("hours_to_expiration"): + config_dict.update( + {"expiration_timestamp": datetime.now() + timedelta(hours=hours_to_expiration)} + ) + if "partition_by" in model_node.config: - config_dict.update({"partition": BigQueryPartitionConfig.parse_model_node(model_node)}) + config_dict.update({"partition": PartitionConfig.parse_model_node(model_node)}) if "cluster_by" in model_node.config: config_dict.update({"cluster": BigQueryClusterConfig.parse_model_node(model_node)}) @@ -124,7 +146,7 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, "materialized_view_name": materialized_view.get("materialized_view_name"), "schema_name": materialized_view.get("schema"), "database_name": materialized_view.get("database"), - "hours_to_expiration": materialized_view.get("hours_to_expiration"), + "expiration_timestamp": materialized_view.get("expiration_timestamp"), "kms_key_name": materialized_view.get("kms_key_name"), "labels": materialized_view.get("labels"), "description": materialized_view.get("description"), @@ -132,7 +154,7 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, if materialized_view.get("partition_field"): config_dict.update( - {"partition": BigQueryPartitionConfig.parse_relation_results(materialized_view)} + {"partition": PartitionConfig.parse_relation_results(materialized_view)} ) if materialized_view.get("cluster_by"): From 97b025c5f7d89aadd5b49ec98526200bb25495bb Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 28 Sep 2023 18:52:48 -0400 Subject: [PATCH 30/53] update create materialized view to use the relation config --- .../macros/relations/materialized_view/create.sql | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/dbt/include/bigquery/macros/relations/materialized_view/create.sql b/dbt/include/bigquery/macros/relations/materialized_view/create.sql index 19be74374..b45b2fe8a 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/create.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/create.sql @@ -1,17 +1,11 @@ {% macro bigquery__get_create_materialized_view_as_sql(relation, sql) %} - {%- set partition_config_raw = config.get('partition_by', none) -%} - {%- set partition_config = adapter.parse_partition_by(partition_config_raw) -%} - {%- if partition_config.time_ingestion_partitioning -%} - {% do exceptions.raise_compiler_error("Time ingestion partitioning is not supported for materialized views") %} - {%- endif -%} - - {%- set cluster_config = config.get('cluster_by', none) -%} + {%- set materialized_view = adapter.Relation.materialized_view_from_model_node(model) -%} create materialized view if not exists {{ relation }} - {{ partition_by(partition_config) }} - {{ cluster_by(cluster_config) }} - {{ bigquery_options(adapter.materialized_view_options(config, model)) }} + {{ partition_by(materialized_view.partition) }} + {{ cluster_by(materialized_view.cluster.fields) }} + {{ bigquery_options(adapter.materialized_view_options(materialized_view)) }} as {{ sql }} {% endmacro %} From d08c54b128452300e7f54de81582d627404b0d43 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 28 Sep 2023 19:20:52 -0400 Subject: [PATCH 31/53] condition on existence of properties before templating them --- .../macros/relations/materialized_view/create.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbt/include/bigquery/macros/relations/materialized_view/create.sql b/dbt/include/bigquery/macros/relations/materialized_view/create.sql index b45b2fe8a..c8ccf0676 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/create.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/create.sql @@ -1,11 +1,11 @@ {% macro bigquery__get_create_materialized_view_as_sql(relation, sql) %} - {%- set materialized_view = adapter.Relation.materialized_view_from_model_node(model) -%} + {%- set materialized_view = adapter.Relation.materialized_view_from_model_node(config.model) -%} create materialized view if not exists {{ relation }} - {{ partition_by(materialized_view.partition) }} - {{ cluster_by(materialized_view.cluster.fields) }} - {{ bigquery_options(adapter.materialized_view_options(materialized_view)) }} + {% if materialized_view.partition %}{{ partition_by(materialized_view.partition) }}{% endif %} + {% if materialized_view.cluster %}{{ cluster_by(materialized_view.cluster.fields) }}{% endif %} + {{ bigquery_options(adapter.get_materialized_view_options(materialized_view)) }} as {{ sql }} {% endmacro %} From 630145d7ba6ed449f308026400cc52983feace00 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 28 Sep 2023 19:21:23 -0400 Subject: [PATCH 32/53] allow for "drop if exists" functionality via the google sdk --- dbt/adapters/bigquery/impl.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index 2b62bd288..3248cc26d 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -141,7 +141,9 @@ def drop_relation(self, relation: BigQueryRelation) -> None: conn = self.connections.get_thread_connection() table_ref = self.get_table_ref_from_relation(relation) - conn.handle.delete_table(table_ref) + + # mimic "drop if exists" functionality that's ubiquitous in most sql implementations + conn.handle.delete_table(table_ref, not_found_ok=True) def truncate_relation(self, relation: BigQueryRelation) -> None: raise dbt.exceptions.NotImplementedError("`truncate` is not implemented for this adapter!") From 288afe21f105fdb2712f9b0e213779581d9a0499 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 28 Sep 2023 20:00:58 -0400 Subject: [PATCH 33/53] remove unnecessary trailing semicolon --- .../bigquery/macros/relations/materialized_view/refresh.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt/include/bigquery/macros/relations/materialized_view/refresh.sql b/dbt/include/bigquery/macros/relations/materialized_view/refresh.sql index 8daae92ed..82bf819cd 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/refresh.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/refresh.sql @@ -1,3 +1,3 @@ {% macro bigquery__refresh_materialized_view(relation) %} - call bq.refresh_materialized_view('{{ relation }}'); + call bq.refresh_materialized_view('{{ relation }}') {% endmacro %} From 37b2ccd3777e729bd43be0d0bda63dfb63930f49 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 28 Sep 2023 20:01:30 -0400 Subject: [PATCH 34/53] implement replace based on create --- .../macros/relations/materialized_view/_replace.sql | 7 ------- .../macros/relations/materialized_view/replace.sql | 11 +++++++++++ 2 files changed, 11 insertions(+), 7 deletions(-) delete mode 100644 dbt/include/bigquery/macros/relations/materialized_view/_replace.sql create mode 100644 dbt/include/bigquery/macros/relations/materialized_view/replace.sql diff --git a/dbt/include/bigquery/macros/relations/materialized_view/_replace.sql b/dbt/include/bigquery/macros/relations/materialized_view/_replace.sql deleted file mode 100644 index c9df0aef6..000000000 --- a/dbt/include/bigquery/macros/relations/materialized_view/_replace.sql +++ /dev/null @@ -1,7 +0,0 @@ -{% macro bigquery__get_replace_materialized_view_as_sql( - relation, - sql -) %} - {{ get_drop_sql(existing_relation) }} - {{ get_create_materialized_view_as_sql(relation, sql) }} -{% endmacro %} diff --git a/dbt/include/bigquery/macros/relations/materialized_view/replace.sql b/dbt/include/bigquery/macros/relations/materialized_view/replace.sql new file mode 100644 index 000000000..ac76b5c9f --- /dev/null +++ b/dbt/include/bigquery/macros/relations/materialized_view/replace.sql @@ -0,0 +1,11 @@ +{% macro bigquery__get_replace_materialized_view_as_sql(relation, sql) %} + + {%- set materialized_view = adapter.Relation.materialized_view_from_model_node(config.model) -%} + + create or replace materialized view if not exists {{ relation }} + {% if materialized_view.partition %}{{ partition_by(materialized_view.partition) }}{% endif %} + {% if materialized_view.cluster %}{{ cluster_by(materialized_view.cluster.fields) }}{% endif %} + {{ bigquery_options(adapter.get_materialized_view_options(materialized_view)) }} + as {{ sql }} + +{% endmacro %} From 0696752fd22563f0269c967c5f5c026a6a636c8b Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 5 Oct 2023 00:17:56 -0400 Subject: [PATCH 35/53] implement clustering, partitioning, and auto refresh for materialized views --- .../bigquery/relation_configs/auto_refresh.py | 14 ++- .../bigquery/relation_configs/cluster.py | 5 +- .../relation_configs/materialized_view.py | 108 +++++++++--------- .../bigquery/relation_configs/partition.py | 4 +- .../relation_components/cluster/describe.sql | 15 +++ .../relation_components/options/describe.sql | 15 +++ .../partition/describe.sql | 26 +++++ .../relations/materialized_view/alter.sql | 4 +- .../relations/materialized_view/describe.sql | 55 +++------ .../adapter/materialized_view_tests/_files.py | 43 +++++++ .../test_materialized_view.py | 63 +++++++--- .../adapter/materialized_view_tests/utils.py | 13 --- 12 files changed, 237 insertions(+), 128 deletions(-) create mode 100644 dbt/include/bigquery/macros/relation_components/cluster/describe.sql create mode 100644 dbt/include/bigquery/macros/relation_components/options/describe.sql create mode 100644 dbt/include/bigquery/macros/relation_components/partition/describe.sql create mode 100644 tests/functional/adapter/materialized_view_tests/_files.py delete mode 100644 tests/functional/adapter/materialized_view_tests/utils.py diff --git a/dbt/adapters/bigquery/relation_configs/auto_refresh.py b/dbt/adapters/bigquery/relation_configs/auto_refresh.py index 05b248787..d3b1111b5 100644 --- a/dbt/adapters/bigquery/relation_configs/auto_refresh.py +++ b/dbt/adapters/bigquery/relation_configs/auto_refresh.py @@ -2,7 +2,7 @@ from typing import Any, Dict, Optional import agate -from dbt.adapters.relation_configs import RelationConfigChange +from dbt.adapters.relation_configs import RelationConfigChange, RelationResults from dbt.contracts.graph.nodes import ModelNode from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase @@ -60,11 +60,15 @@ def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: return config_dict @classmethod - def parse_relation_results(cls, relation_results_entry: agate.Row) -> Dict[str, Any]: # type: ignore + def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, Any]: + options_config: agate.Table = relation_results.get("options") # type: ignore + options = { + option.get("option_name"): option.get("option_value") for option in options_config + } config_dict = { - "enable_refresh": bool_setting(relation_results_entry.get("enable_refresh")), - "refresh_interval_minutes": relation_results_entry.get("refresh_interval_minutes"), - "max_staleness": relation_results_entry.get("max_staleness"), + "enable_refresh": bool_setting(options.get("enable_refresh")), + "refresh_interval_minutes": options.get("refresh_interval_minutes"), + "max_staleness": options.get("max_staleness"), } return config_dict diff --git a/dbt/adapters/bigquery/relation_configs/cluster.py b/dbt/adapters/bigquery/relation_configs/cluster.py index 0c775ab92..addf84db6 100644 --- a/dbt/adapters/bigquery/relation_configs/cluster.py +++ b/dbt/adapters/bigquery/relation_configs/cluster.py @@ -40,9 +40,8 @@ def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: return config_dict @classmethod - def parse_relation_results(cls, relation_results_entry: agate.Row) -> Dict[str, Any]: # type: ignore - field_list = relation_results_entry.get("cluster_by", "") - config_dict = {"fields": frozenset(field_list.split(","))} + def parse_relation_results(cls, relation_results: agate.Table) -> Dict[str, Any]: # type: ignore + config_dict = {"fields": frozenset(row.get("column_name") for row in relation_results)} return config_dict diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index 7fdf4477a..88f1b0f3e 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -33,10 +33,10 @@ class BigQueryMaterializedViewConfig(BigQueryRelationConfigBase): - materialized_view_name: name of the materialized view - schema: dataset name of the materialized view - database: project name of the database + - auto_refresh: object containing refresh scheduling information - partition: object containing partition information - cluster: object containing cluster information - - auto_refresh: object containing refresh scheduling information - - hours_to_expiration: The time when table expires + - expiration_timestamp: the time when table expires - kms_key_name: user defined Cloud KMS encryption key - labels: used to organized and group objects - description: user description for materialized view @@ -45,9 +45,9 @@ class BigQueryMaterializedViewConfig(BigQueryRelationConfigBase): materialized_view_name: str schema_name: str database_name: str + auto_refresh: BigQueryAutoRefreshConfig partition: Optional[PartitionConfig] = None cluster: Optional[BigQueryClusterConfig] = None - auto_refresh: Optional[BigQueryAutoRefreshConfig] = None expiration_timestamp: Optional[datetime] = None kms_key_name: Optional[str] = None labels: Optional[Dict[str, str]] = None @@ -58,15 +58,22 @@ def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryMaterializedViewConf # required kwargs_dict: Dict[str, Any] = { "materialized_view_name": cls._render_part( - ComponentName.Identifier, config_dict.get("materialized_view_name") + ComponentName.Identifier, config_dict["materialized_view_name"] ), - "schema_name": cls._render_part(ComponentName.Schema, config_dict.get("schema_name")), + "schema_name": cls._render_part(ComponentName.Schema, config_dict["schema_name"]), "database_name": cls._render_part( - ComponentName.Database, config_dict.get("database_name") + ComponentName.Database, config_dict["database_name"] ), + "auto_refresh": BigQueryAutoRefreshConfig.from_dict(config_dict["auto_refresh"]), } # optional + if partition := config_dict.get("partition"): + kwargs_dict.update({"partition": PartitionConfig.parse(partition)}) + + if cluster := config_dict.get("cluster"): + kwargs_dict.update({"cluster": BigQueryClusterConfig.from_dict(cluster)}) + optional_attributes = [ "expiration_timestamp", "kms_key_name", @@ -78,15 +85,6 @@ def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryMaterializedViewConf } kwargs_dict.update(optional_attributes_set_by_user) - if partition := config_dict.get("partition"): - kwargs_dict.update({"partition": PartitionConfig.parse(partition)}) - - if cluster := config_dict.get("cluster"): - kwargs_dict.update({"cluster": BigQueryClusterConfig.from_dict(cluster)}) - - if auto_refresh := config_dict.get("auto_refresh"): - kwargs_dict.update({"auto_refresh": BigQueryAutoRefreshConfig.from_dict(auto_refresh)}) - materialized_view: "BigQueryMaterializedViewConfig" = super().from_dict(kwargs_dict) # type: ignore return materialized_view @@ -108,79 +106,83 @@ def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: "materialized_view_name": model_node.identifier, "schema_name": model_node.schema, "database_name": model_node.database, - "kms_key_name": model_node.config.extra.get("kms_key_name"), - "labels": model_node.config.extra.get("labels"), + "auto_refresh": BigQueryAutoRefreshConfig.parse_model_node(model_node), } - if description := model_node.config.extra.get("description"): - if model_node.config.persist_docs: - config_dict.update({"description": description}) - - if hours_to_expiration := model_node.config.extra.get("hours_to_expiration"): - config_dict.update( - {"expiration_timestamp": datetime.now() + timedelta(hours=hours_to_expiration)} - ) - + # optional if "partition_by" in model_node.config: config_dict.update({"partition": PartitionConfig.parse_model_node(model_node)}) if "cluster_by" in model_node.config: config_dict.update({"cluster": BigQueryClusterConfig.parse_model_node(model_node)}) - if "enable_refresh" in model_node.config: + if hours_to_expiration := model_node.config.extra.get("hours_to_expiration"): config_dict.update( - {"auto_refresh": BigQueryAutoRefreshConfig.parse_model_node(model_node)} + {"expiration_timestamp": datetime.now() + timedelta(hours=hours_to_expiration)} ) + if kms_key_name := model_node.config.extra.get("kms_key_name"): + config_dict.update({"kms_key_name": kms_key_name}) + + if labels := model_node.config.extra.get("labels"): + config_dict.update({"labels": labels}) + + if description := model_node.config.extra.get("description"): + if model_node.config.persist_docs: + config_dict.update({"description": description}) + return config_dict @classmethod def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, Any]: - materialized_view_config = relation_results.get("materialized_view") - if isinstance(materialized_view_config, agate.Table): - materialized_view = cls._get_first_row(materialized_view_config) - else: - raise DbtRuntimeError("Unsupported type returned ex. None") + materialized_view_config: agate.Table = relation_results.get("materialized_view") # type: ignore + materialized_view: agate.Row = cls._get_first_row(materialized_view_config) + options_config: agate.Table = relation_results.get("options") # type: ignore + options = { + option.get("option_name"): option.get("option_value") for option in options_config.rows + } config_dict = { - "materialized_view_name": materialized_view.get("materialized_view_name"), - "schema_name": materialized_view.get("schema"), - "database_name": materialized_view.get("database"), - "expiration_timestamp": materialized_view.get("expiration_timestamp"), - "kms_key_name": materialized_view.get("kms_key_name"), - "labels": materialized_view.get("labels"), - "description": materialized_view.get("description"), + "materialized_view_name": materialized_view.get("table_name"), + "schema_name": materialized_view.get("table_schema"), + "database_name": materialized_view.get("table_catalog"), + "auto_refresh": BigQueryAutoRefreshConfig.parse_relation_results(relation_results), } - if materialized_view.get("partition_field"): + # optional + partition_by: agate.Table = relation_results.get("partition_by") # type: ignore + if len(partition_by) > 0: config_dict.update( - {"partition": PartitionConfig.parse_relation_results(materialized_view)} + {"partition": PartitionConfig.parse_relation_results(partition_by[0])} ) - if materialized_view.get("cluster_by"): + cluster_by: agate.Table = relation_results.get("cluster_by") # type: ignore + if len(cluster_by) > 0: config_dict.update( - {"cluster": BigQueryClusterConfig.parse_relation_results(materialized_view)} + {"cluster": BigQueryClusterConfig.parse_relation_results(cluster_by)} ) - if materialized_view.get("enable_refresh"): - config_dict.update( - { - "auto_refresh": BigQueryAutoRefreshConfig.parse_relation_results( - materialized_view - ) - } - ) + config_dict.update( + { + "expiration_timestamp": options.get("expiration_timestamp"), + "kms_key_name": options.get("kms_key_name"), + "labels": options.get("labels"), + "description": options.get("description"), + } + ) return config_dict @dataclass class BigQueryMaterializedViewConfigChangeset: + auto_refresh: Optional[BigQueryAutoRefreshConfigChange] = None partition: Optional[BigQueryPartitionConfigChange] = None cluster: Optional[BigQueryClusterConfigChange] = None - auto_refresh: Optional[BigQueryAutoRefreshConfigChange] = None + expiration_timestamp: Optional[datetime] = None kms_key_name: Optional[str] = None labels: Optional[Dict[str, str]] = None + description: Optional[str] = None @property def requires_full_refresh(self) -> bool: diff --git a/dbt/adapters/bigquery/relation_configs/partition.py b/dbt/adapters/bigquery/relation_configs/partition.py index c99d70235..e773b9e2c 100644 --- a/dbt/adapters/bigquery/relation_configs/partition.py +++ b/dbt/adapters/bigquery/relation_configs/partition.py @@ -113,9 +113,9 @@ def parse_relation_results(cls, describe_relation_results: agate.Row) -> Dict[st Parse the results of a describe query into a raw config for `PartitionConfig.parse` """ config_dict = { - "field": describe_relation_results.get("partition_field"), + "field": describe_relation_results.get("partition_column_name"), "data_type": describe_relation_results.get("partition_data_type"), - "granularity": describe_relation_results.get("partition_granularity"), + "granularity": describe_relation_results.get("partition_type"), } # combine range fields into dictionary, like the model config diff --git a/dbt/include/bigquery/macros/relation_components/cluster/describe.sql b/dbt/include/bigquery/macros/relation_components/cluster/describe.sql new file mode 100644 index 000000000..43d1eeb9b --- /dev/null +++ b/dbt/include/bigquery/macros/relation_components/cluster/describe.sql @@ -0,0 +1,15 @@ +{% macro bigquery__get_describe_cluster_sql(relation) %} + select + column_name + from {{ relation.information_schema('COLUMNS') }} + where table_name = '{{ relation.identifier }}' + and table_schema = '{{ relation.schema }}' + and table_catalog = '{{ relation.database }}' + and clustering_ordinal_position is not null +{% endmacro %} + + +{% macro bigquery__describe_cluster(relation) %} + {%- set _sql = bigquery__get_describe_cluster_sql(relation) -%} + {% do return(run_query(_sql)) %} +{% endmacro %} diff --git a/dbt/include/bigquery/macros/relation_components/options/describe.sql b/dbt/include/bigquery/macros/relation_components/options/describe.sql new file mode 100644 index 000000000..55dbfa947 --- /dev/null +++ b/dbt/include/bigquery/macros/relation_components/options/describe.sql @@ -0,0 +1,15 @@ +{% macro bigquery__get_describe_options_sql(relation) %} + select + option_name, + option_value + from {{ relation.information_schema('TABLE_OPTIONS') }} + where table_name = '{{ relation.identifier }}' + and table_schema = '{{ relation.schema }}' + and table_catalog = '{{ relation.database }}' +{% endmacro %} + + +{% macro bigquery__describe_options(relation) %} + {%- set _sql = bigquery__get_describe_options_sql(relation) -%} + {% do return(run_query(_sql)) %} +{% endmacro %} diff --git a/dbt/include/bigquery/macros/relation_components/partition/describe.sql b/dbt/include/bigquery/macros/relation_components/partition/describe.sql new file mode 100644 index 000000000..90019cbf4 --- /dev/null +++ b/dbt/include/bigquery/macros/relation_components/partition/describe.sql @@ -0,0 +1,26 @@ +{% macro bigquery__get_describe_partition_sql(relation) %} + select + c.column_name as partition_column_name, + c.data_type as partition_data_type, + case + when regexp_contains(p.partition_id, '^[0-9]{4}$') THEN 'year' + when regexp_contains(p.partition_id, '^[0-9]{6}$') THEN 'month' + when regexp_contains(p.partition_id, '^[0-9]{8}$') THEN 'day' + when regexp_contains(p.partition_id, '^[0-9]{10}$') THEN 'hour' + end as partition_type + from {{ relation.information_schema('PARTITIONS') }} p + join {{ relation.information_schema('COLUMNS') }} c + on c.table_name = p.table_name + and c.table_schema = p.table_schema + and c.table_catalog = p.table_catalog + where p.table_name = '{{ relation.identifier }}' + and p.table_schema = '{{ relation.schema }}' + and p.table_catalog = '{{ relation.database }}' + and c.is_partitioning_column = 'YES' +{% endmacro %} + + +{% macro bigquery__describe_partition(relation) %} + {% set _sql = bigquery__get_describe_partition_sql(relation) %} + {% do return(run_query(_sql)) %} +{% endmacro %} diff --git a/dbt/include/bigquery/macros/relations/materialized_view/alter.sql b/dbt/include/bigquery/macros/relations/materialized_view/alter.sql index 510b3371e..00f43073b 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/alter.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/alter.sql @@ -2,7 +2,9 @@ relation, configuration_changes, sql, - existing_relation + existing_relation, + backup_relation, + intermediate_relation ) %} {% if configuration_changes.requires_full_refresh %} diff --git a/dbt/include/bigquery/macros/relations/materialized_view/describe.sql b/dbt/include/bigquery/macros/relations/materialized_view/describe.sql index 195bd63a2..89e3f4006 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/describe.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/describe.sql @@ -1,45 +1,24 @@ {% macro bigquery__describe_materialized_view(relation) %} {%- set _materialized_view_sql -%} - -- checks each column to see if its a cluster_by field then adds it to a new list - with ClusteringColumns as ( - select - table_name, - ARRAY_AGG( - case - when clustering_ordinal_position is not null then column_name - else null - end - ignore nulls - ) as clustering_fields - from - `{{ relation.database }}.{{ relation.schema }}.INFORMATION_SCHEMA.COLUMNS` - where - table_name = '{{ relation.name }}' - GROUP BY - table_name -) select - mv.table_name as materialized_view, - c.column_name, - c.is_partitioning_column, - c.clustering_ordinal_position, - topt.option_name, - topt.option_value, - topt.option_type - from - `{{ relation.database }}.{{ relation.schema }}.INFORMATION_SCHEMA.MATERIALIZED_VIEWS` mv - left join - `{{ relation.database }}.{{ relation.schema }}.INFORMATION_SCHEMA.COLUMNS` c - on - mv.table_name = c.table_name - left join - `{{ relation.database }}.{{ relation.schema }}.INFORMATION_SCHEMA.TABLE_OPTIONS` topt - on - mv.table_name = topt.table_name - where - mv.table_name = '{{ relation.name }}' + table_name, + table_schema, + table_catalog + from {{ relation.information_schema('MATERIALIZED_VIEWS') }} + where table_name = '{{ relation.identifier }}' + and table_schema = '{{ relation.schema }}' + and table_catalog = '{{ relation.database }}' {%- endset %} {% set _materialized_view = run_query(_materialized_view_sql) %} - {% do return({'materialized_view': _materialized_viewy}) %} + {%- set _partition_by = bigquery__describe_partition(relation) -%} + {%- set _cluster_by = bigquery__describe_cluster(relation) -%} + {%- set _options = bigquery__describe_options(relation) -%} + + {% do return({ + 'materialized_view': _materialized_view, + 'partition_by': _partition_by, + 'cluster_by': _cluster_by, + 'options': _options + }) %} {% endmacro %} diff --git a/tests/functional/adapter/materialized_view_tests/_files.py b/tests/functional/adapter/materialized_view_tests/_files.py new file mode 100644 index 000000000..6cc69d2c6 --- /dev/null +++ b/tests/functional/adapter/materialized_view_tests/_files.py @@ -0,0 +1,43 @@ +MY_SEED = """ +id,value,record_valid_date +1,100,2023-01-01 00:00:00 +2,200,2023-01-02 00:00:00 +3,300,2023-01-02 00:00:00 +""".strip() + +MY_BASE_TABLE = """ +{{ config( + materialized='table', + partition_by={ + "field": "record_valid_date", + "data_type": "timestamp", + "granularity": "day" + }, + cluster_by=["id", "value"] +) }} +select + id, + value, + record_valid_date +from {{ ref('my_seed') }} +""" + +MY_MATERIALIZED_VIEW = """ +{{ config( + materialized='materialized_view', + partition_by={ + "field": "record_valid_date", + "data_type": "timestamp", + "granularity": "day" + }, + cluster_by=["id", "value"], + enable_refresh=True, + refresh_interval_minutes=60, + max_staleness="INTERVAL 45 MINUTE" +) }} +select + id, + value, + record_valid_date +from {{ ref('my_base_table') }} +""" diff --git a/tests/functional/adapter/materialized_view_tests/test_materialized_view.py b/tests/functional/adapter/materialized_view_tests/test_materialized_view.py index 03d588cd4..13142b2a4 100644 --- a/tests/functional/adapter/materialized_view_tests/test_materialized_view.py +++ b/tests/functional/adapter/materialized_view_tests/test_materialized_view.py @@ -3,26 +3,64 @@ import pytest from dbt.adapters.base.relation import BaseRelation -from dbt.tests.util import get_connection, run_dbt +from dbt.contracts.relation import RelationType from dbt.tests.adapter.materialized_view.basic import MaterializedViewBasic - from dbt.tests.adapter.materialized_view.files import MY_TABLE, MY_VIEW +from dbt.tests.util import ( + get_connection, + get_model_file, + run_dbt, + set_model_file, +) - -MY_MATERIALIZED_VIEW = """ -{{ config( - materialized='materialized_view' -) }} -select * from {{ ref('my_seed') }} -""" +from tests.functional.adapter.materialized_view_tests._files import ( + MY_BASE_TABLE, + MY_MATERIALIZED_VIEW, + MY_SEED, +) class TestBigqueryMaterializedViewsBasic(MaterializedViewBasic): + @pytest.fixture(scope="class") + def my_base_table(self, project) -> BaseRelation: + """ + The base table for a materialized view needs to be partitioned in + the same way as the materialized view. So if we want to create a partitioned + materialized view, we need to partition the base table. This table is a + select * on the seed table, plus a partition. + """ + return project.adapter.Relation.create( + identifier="my_base_table", + schema=project.test_schema, + database=project.database, + type=RelationType.Table, + ) + + @pytest.fixture(scope="function", autouse=True) + def setup(self, project, my_base_table, my_materialized_view): # type: ignore + run_dbt(["seed"]) + run_dbt(["run", "--models", my_base_table.identifier, "--full-refresh"]) + run_dbt(["run", "--models", my_materialized_view.identifier, "--full-refresh"]) + + # the tests touch these files, store their contents in memory + initial_model = get_model_file(project, my_materialized_view) + + yield + + # and then reset them after the test runs + set_model_file(project, my_materialized_view, initial_model) + project.run_sql(f"drop schema if exists {project.test_schema} cascade") + + @pytest.fixture(scope="class", autouse=True) + def seeds(self): + return {"my_seed.csv": MY_SEED} + @pytest.fixture(scope="class", autouse=True) def models(self): yield { "my_table.sql": MY_TABLE, "my_view.sql": MY_VIEW, + "my_base_table.sql": MY_BASE_TABLE, "my_materialized_view.sql": MY_MATERIALIZED_VIEW, } @@ -65,13 +103,12 @@ def test_view_replaces_materialized_view(self, project, my_materialized_view): self.swap_materialized_view_to_view(project, my_materialized_view) - run_dbt( - ["run", "--models", my_materialized_view.identifier, "--full-refresh"] - ) # add --full-refresh + # add --full-refresh + run_dbt(["run", "--models", my_materialized_view.identifier, "--full-refresh"]) assert self.query_relation_type(project, my_materialized_view) == "view" @pytest.mark.skip( - "It appears BQ updates the materialized view almost immediately, which fails this test." + "It looks like BQ updates the materialized view almost immediately, which fails this test." ) def test_materialized_view_only_updates_after_refresh( self, project, my_materialized_view, my_seed diff --git a/tests/functional/adapter/materialized_view_tests/utils.py b/tests/functional/adapter/materialized_view_tests/utils.py deleted file mode 100644 index 25e40d337..000000000 --- a/tests/functional/adapter/materialized_view_tests/utils.py +++ /dev/null @@ -1,13 +0,0 @@ -from dbt.adapters.bigquery.relation import BigQueryRelation - - -def query_autorefresh(project, relation: BigQueryRelation) -> bool: - sql = f""" - select - case mv.autorefresh when 't' then True when 'f' then False end as autorefresh - from stv_mv_info mv - where trim(mv.name) ilike '{ relation.identifier }' - and trim(mv.schema) ilike '{ relation.schema }' - and trim(mv.db_name) ilike '{ relation.database }' - """ - return project.run_sql(sql, fetch="one")[0] From d4634abfc64d171d1310739ac503dcf948a10178 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 5 Oct 2023 01:12:03 -0400 Subject: [PATCH 36/53] remove include_policy from BigQueryRelation, it's causing unit tests to fail and is not used at the moment --- dbt/adapters/bigquery/relation.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/dbt/adapters/bigquery/relation.py b/dbt/adapters/bigquery/relation.py index eed27b254..d753da1fd 100644 --- a/dbt/adapters/bigquery/relation.py +++ b/dbt/adapters/bigquery/relation.py @@ -1,12 +1,11 @@ from dataclasses import dataclass, field -from typing import Optional +from typing import FrozenSet, Optional from itertools import chain, islice from dbt.context.providers import RuntimeConfigObject from dbt.adapters.base.relation import BaseRelation, ComponentName, InformationSchema from dbt.adapters.relation_configs import RelationResults, RelationConfigChangeAction from dbt.adapters.bigquery.relation_configs import ( - BigQueryIncludePolicy, BigQueryQuotePolicy, BigQueryMaterializedViewConfig, BigQueryMaterializedViewConfigChangeset, @@ -28,11 +27,13 @@ class BigQueryRelation(BaseRelation): quote_character: str = "`" location: Optional[str] = None - include_policy: BigQueryIncludePolicy = field(default_factory=lambda: BigQueryIncludePolicy()) + # this is causing unit tests to fail + # include_policy: BigQueryIncludePolicy = field(default_factory=lambda: BigQueryIncludePolicy()) quote_policy: BigQueryQuotePolicy = field(default_factory=lambda: BigQueryQuotePolicy()) - # why do we need to use default_factory here but we can assign it directly in dbt-postgres? - renameable_relations = frozenset({RelationType.Table}) - replaceable_relations = frozenset({RelationType.Table, RelationType.View}) + renameable_relations: FrozenSet[RelationType] = frozenset({RelationType.Table}) + replaceable_relations: FrozenSet[RelationType] = frozenset( + {RelationType.Table, RelationType.View} + ) def matches( self, From f4f9cf532f46946812d6d3d03967bf7d1f7c312e Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 5 Oct 2023 13:20:11 -0400 Subject: [PATCH 37/53] partition type cannot be queried for materialized views, adjust the describe query and equality check to account for that --- .../bigquery/relation_configs/partition.py | 16 ++++++++++ .../partition/describe.sql | 32 ++++++++++++++----- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/dbt/adapters/bigquery/relation_configs/partition.py b/dbt/adapters/bigquery/relation_configs/partition.py index e773b9e2c..9c9714d7f 100644 --- a/dbt/adapters/bigquery/relation_configs/partition.py +++ b/dbt/adapters/bigquery/relation_configs/partition.py @@ -128,6 +128,22 @@ def parse_relation_results(cls, describe_relation_results: agate.Row) -> Dict[st return config_dict + def __eq__(self, other: Any) -> bool: + """ + We can't query partitions on materialized views, hence we are assuming that if the field and data type + have not changed, then the partition has not changed either. This should be updated to include the + granularity and range once that issue is resolved. Until then, users will need to supply --full-refresh + if they keep the field but change the partition granularity. + """ + if isinstance(other, PartitionConfig): + return all( + { + other.field == self.field, + other.data_type == self.data_type, + } + ) + return False + @dataclass(frozen=True, eq=True, unsafe_hash=True) class BigQueryPartitionConfigChange(RelationConfigChange): diff --git a/dbt/include/bigquery/macros/relation_components/partition/describe.sql b/dbt/include/bigquery/macros/relation_components/partition/describe.sql index 90019cbf4..460f777f6 100644 --- a/dbt/include/bigquery/macros/relation_components/partition/describe.sql +++ b/dbt/include/bigquery/macros/relation_components/partition/describe.sql @@ -1,4 +1,20 @@ {% macro bigquery__get_describe_partition_sql(relation) %} + with max_partition_id as ( + select + table_name, + table_schema, + table_catalog, + max(partition_id) as partition_id + from {{ relation.information_schema('PARTITIONS') }} + where table_name = '{{ relation.identifier }}' + and table_schema = '{{ relation.schema }}' + and table_catalog = '{{ relation.database }}' + group by + table_name, + table_schema, + table_catalog + ) + select c.column_name as partition_column_name, c.data_type as partition_data_type, @@ -8,14 +24,14 @@ when regexp_contains(p.partition_id, '^[0-9]{8}$') THEN 'day' when regexp_contains(p.partition_id, '^[0-9]{10}$') THEN 'hour' end as partition_type - from {{ relation.information_schema('PARTITIONS') }} p - join {{ relation.information_schema('COLUMNS') }} c - on c.table_name = p.table_name - and c.table_schema = p.table_schema - and c.table_catalog = p.table_catalog - where p.table_name = '{{ relation.identifier }}' - and p.table_schema = '{{ relation.schema }}' - and p.table_catalog = '{{ relation.database }}' + from {{ relation.information_schema('COLUMNS') }} c + left join max_partition_id p + on p.table_name = t.table_name + and p.table_schema = t.table_schema + and p.table_catalog = t.table_catalog + where c.table_name = '{{ relation.identifier }}' + and c.table_schema = '{{ relation.schema }}' + and c.table_catalog = '{{ relation.database }}' and c.is_partitioning_column = 'YES' {% endmacro %} From ee3202676f6d08ba21ec4bb8ab5f9486acc88bd2 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 5 Oct 2023 21:03:30 -0400 Subject: [PATCH 38/53] add describe_relation for materialized views --- dbt/adapters/bigquery/impl.py | 12 +++ .../partition/describe.sql | 6 +- .../relations/materialized_view/describe.sql | 2 - .../test_describe_relation.py | 76 +++++++++++++++++++ 4 files changed, 91 insertions(+), 5 deletions(-) create mode 100644 tests/functional/adapter/describe_relation/test_describe_relation.py diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index 3248cc26d..c5177e19a 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -783,6 +783,18 @@ def get_materialized_view_options( return opts + def describe_relation(self, relation: BigQueryRelation): + if relation.type == RelationType.MaterializedView: + macro = "bigquery__describe_materialized_view" + parser = BigQueryMaterializedViewConfig + else: + raise dbt.exceptions.DbtRuntimeError( + f"The method `BigQueryAdapter.describe_relation` is not implemented " + f"for the relation type: {relation.type}." + ) + relation_results = self.execute_macro(macro, kwargs={"relation": relation}) + return parser.from_relation_results(relation_results) + @available.parse_none def grant_access_to(self, entity, entity_type, role, grant_target_dict): """ diff --git a/dbt/include/bigquery/macros/relation_components/partition/describe.sql b/dbt/include/bigquery/macros/relation_components/partition/describe.sql index 460f777f6..e8e205801 100644 --- a/dbt/include/bigquery/macros/relation_components/partition/describe.sql +++ b/dbt/include/bigquery/macros/relation_components/partition/describe.sql @@ -26,9 +26,9 @@ end as partition_type from {{ relation.information_schema('COLUMNS') }} c left join max_partition_id p - on p.table_name = t.table_name - and p.table_schema = t.table_schema - and p.table_catalog = t.table_catalog + on p.table_name = c.table_name + and p.table_schema = c.table_schema + and p.table_catalog = c.table_catalog where c.table_name = '{{ relation.identifier }}' and c.table_schema = '{{ relation.schema }}' and c.table_catalog = '{{ relation.database }}' diff --git a/dbt/include/bigquery/macros/relations/materialized_view/describe.sql b/dbt/include/bigquery/macros/relations/materialized_view/describe.sql index 89e3f4006..231443cf8 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/describe.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/describe.sql @@ -11,13 +11,11 @@ {%- endset %} {% set _materialized_view = run_query(_materialized_view_sql) %} - {%- set _partition_by = bigquery__describe_partition(relation) -%} {%- set _cluster_by = bigquery__describe_cluster(relation) -%} {%- set _options = bigquery__describe_options(relation) -%} {% do return({ 'materialized_view': _materialized_view, - 'partition_by': _partition_by, 'cluster_by': _cluster_by, 'options': _options }) %} diff --git a/tests/functional/adapter/describe_relation/test_describe_relation.py b/tests/functional/adapter/describe_relation/test_describe_relation.py new file mode 100644 index 000000000..54b72fcd8 --- /dev/null +++ b/tests/functional/adapter/describe_relation/test_describe_relation.py @@ -0,0 +1,76 @@ +import pytest + +from dbt.adapters.base.relation import BaseRelation +from dbt.contracts.relation import RelationType +from dbt.tests.util import get_connection, run_dbt + +from dbt.adapters.bigquery.relation_configs import BigQueryMaterializedViewConfig +from tests.functional.adapter.describe_relation._files import ( + MY_BASE_TABLE, + MY_MATERIALIZED_VIEW, + MY_SEED, +) + + +class TestDescribeRelation: + @pytest.fixture(scope="class", autouse=True) + def seeds(self): + return {"my_seed.csv": MY_SEED} + + @pytest.fixture(scope="class", autouse=True) + def models(self): + yield { + "my_base_table.sql": MY_BASE_TABLE, + "my_materialized_view.sql": MY_MATERIALIZED_VIEW, + } + + @pytest.fixture(scope="class") + def my_seed(self, project) -> BaseRelation: + return project.adapter.Relation.create( + identifier="my_seed", + schema=project.test_schema, + database=project.database, + type=RelationType.Table, + ) + + @pytest.fixture(scope="class") + def my_base_table(self, project) -> BaseRelation: + """ + The base table for a materialized view needs to be partitioned in + the same way as the materialized view. So if we want to create a partitioned + materialized view, we need to partition the base table. This table is a + select * on the seed table, plus a partition. + """ + return project.adapter.Relation.create( + identifier="my_base_table", + schema=project.test_schema, + database=project.database, + type=RelationType.Table, + ) + + @pytest.fixture(scope="class") + def my_materialized_view(self, project) -> BaseRelation: + return project.adapter.Relation.create( + identifier="my_materialized_view", + schema=project.test_schema, + database=project.database, + type=RelationType.MaterializedView, + ) + + @pytest.fixture(scope="class", autouse=True) + def setup(self, project, my_base_table, my_materialized_view): + run_dbt(["seed"]) + run_dbt(["run"]) + yield + project.run_sql(f"drop schema if exists {project.test_schema} cascade") + + def test_describe_materialized_view(self, project, my_materialized_view): + with get_connection(project.adapter): + results = project.adapter.describe_relation(my_materialized_view) + assert isinstance(results, BigQueryMaterializedViewConfig) + assert results.materialized_view_name == f'"{my_materialized_view.identifier}"' + assert results.schema_name == f'"{my_materialized_view.schema}"' + assert results.database_name == f'"{my_materialized_view.database}"' + assert results.cluster.fields == frozenset({"id"}) + assert results.auto_refresh.enable_refresh is True + assert results.auto_refresh.refresh_interval_minutes == 30 From 32d9b894d592c35789deb3984f366a1c23b5108a Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 5 Oct 2023 21:04:30 -0400 Subject: [PATCH 39/53] break out common utilities into a mixin for materialized view tests --- .../adapter/describe_relation/_files.py | 40 ++++++++ .../adapter/materialized_view_tests/_mixin.py | 94 +++++++++++++++++++ .../test_materialized_view.py | 93 +----------------- 3 files changed, 137 insertions(+), 90 deletions(-) create mode 100644 tests/functional/adapter/describe_relation/_files.py create mode 100644 tests/functional/adapter/materialized_view_tests/_mixin.py diff --git a/tests/functional/adapter/describe_relation/_files.py b/tests/functional/adapter/describe_relation/_files.py new file mode 100644 index 000000000..00979ca1e --- /dev/null +++ b/tests/functional/adapter/describe_relation/_files.py @@ -0,0 +1,40 @@ +MY_SEED = """ +id,value,record_date +1,100,2023-01-01 00:00:00 +2,200,2023-01-02 00:00:00 +3,300,2023-01-02 00:00:00 +""".strip() + +MY_BASE_TABLE = """ +{{ config( + materialized='table', + partition_by={ + "field": "record_date", + "data_type": "timestamp", + "granularity": "day" + }, + cluster_by=["id", "value"] +) }} +select + id, + value, + record_date +from {{ ref('my_seed') }} +""" + +MY_MATERIALIZED_VIEW = """ +{{ config( + materialized='materialized_view', + partition_by={ + "field": "record_date", + "data_type": "timestamp", + "granularity": "day" + }, + cluster_by="id", +) }} +select + id, + value, + record_date +from {{ ref('my_base_table') }} +""" diff --git a/tests/functional/adapter/materialized_view_tests/_mixin.py b/tests/functional/adapter/materialized_view_tests/_mixin.py new file mode 100644 index 000000000..5933a2dd2 --- /dev/null +++ b/tests/functional/adapter/materialized_view_tests/_mixin.py @@ -0,0 +1,94 @@ +from typing import Optional, Tuple + +import pytest + +from dbt.adapters.base.relation import BaseRelation +from dbt.contracts.relation import RelationType +from dbt.tests.adapter.materialized_view.files import MY_TABLE, MY_VIEW +from dbt.tests.util import ( + get_connection, + get_model_file, + run_dbt, + set_model_file, +) + +from tests.functional.adapter.materialized_view_tests._files import ( + MY_BASE_TABLE, + MY_MATERIALIZED_VIEW, + MY_SEED, +) + + +class BigQueryMaterializedViewMixin: + @pytest.fixture(scope="class") + def my_base_table(self, project) -> BaseRelation: + """ + The base table for a materialized view needs to be partitioned in + the same way as the materialized view. So if we want to create a partitioned + materialized view, we need to partition the base table. This table is a + select * on the seed table, plus a partition. + """ + return project.adapter.Relation.create( + identifier="my_base_table", + schema=project.test_schema, + database=project.database, + type=RelationType.Table, + ) + + @pytest.fixture(scope="function", autouse=True) + def setup(self, project, my_base_table, my_materialized_view): # type: ignore + run_dbt(["seed"]) + run_dbt(["run", "--models", my_base_table.identifier, "--full-refresh"]) + run_dbt(["run", "--models", my_materialized_view.identifier, "--full-refresh"]) + + # the tests touch these files, store their contents in memory + initial_model = get_model_file(project, my_materialized_view) + + yield + + # and then reset them after the test runs + set_model_file(project, my_materialized_view, initial_model) + project.run_sql(f"drop schema if exists {project.test_schema} cascade") + + @pytest.fixture(scope="class", autouse=True) + def seeds(self): + return {"my_seed.csv": MY_SEED} + + @pytest.fixture(scope="class", autouse=True) + def models(self): + yield { + "my_table.sql": MY_TABLE, + "my_view.sql": MY_VIEW, + "my_base_table.sql": MY_BASE_TABLE, + "my_materialized_view.sql": MY_MATERIALIZED_VIEW, + } + + @staticmethod + def insert_record(project, table: BaseRelation, record: Tuple[int, int]) -> None: + my_id, value = record + project.run_sql(f"insert into {table} (id, value) values ({my_id}, {value})") + + @staticmethod + def refresh_materialized_view(project, materialized_view: BaseRelation) -> None: + sql = f""" + call bq.refresh_materialized_view( + '{materialized_view.database}.{materialized_view.schema}.{materialized_view.identifier}' + ) + """ + project.run_sql(sql) + + @staticmethod + def query_row_count(project, relation: BaseRelation) -> int: + sql = f"select count(*) from {relation}" + return project.run_sql(sql, fetch="one")[0] + + # look into syntax + @staticmethod + def query_relation_type(project, relation: BaseRelation) -> Optional[str]: + with get_connection(project.adapter) as conn: + table = conn.handle.get_table( + project.adapter.connections.get_bq_table( + relation.database, relation.schema, relation.identifier + ) + ) + return table.table_type.lower() diff --git a/tests/functional/adapter/materialized_view_tests/test_materialized_view.py b/tests/functional/adapter/materialized_view_tests/test_materialized_view.py index 13142b2a4..7ca90983e 100644 --- a/tests/functional/adapter/materialized_view_tests/test_materialized_view.py +++ b/tests/functional/adapter/materialized_view_tests/test_materialized_view.py @@ -1,99 +1,12 @@ -from typing import Optional, Tuple - import pytest -from dbt.adapters.base.relation import BaseRelation -from dbt.contracts.relation import RelationType +from dbt.tests.util import run_dbt from dbt.tests.adapter.materialized_view.basic import MaterializedViewBasic -from dbt.tests.adapter.materialized_view.files import MY_TABLE, MY_VIEW -from dbt.tests.util import ( - get_connection, - get_model_file, - run_dbt, - set_model_file, -) - -from tests.functional.adapter.materialized_view_tests._files import ( - MY_BASE_TABLE, - MY_MATERIALIZED_VIEW, - MY_SEED, -) - - -class TestBigqueryMaterializedViewsBasic(MaterializedViewBasic): - @pytest.fixture(scope="class") - def my_base_table(self, project) -> BaseRelation: - """ - The base table for a materialized view needs to be partitioned in - the same way as the materialized view. So if we want to create a partitioned - materialized view, we need to partition the base table. This table is a - select * on the seed table, plus a partition. - """ - return project.adapter.Relation.create( - identifier="my_base_table", - schema=project.test_schema, - database=project.database, - type=RelationType.Table, - ) - - @pytest.fixture(scope="function", autouse=True) - def setup(self, project, my_base_table, my_materialized_view): # type: ignore - run_dbt(["seed"]) - run_dbt(["run", "--models", my_base_table.identifier, "--full-refresh"]) - run_dbt(["run", "--models", my_materialized_view.identifier, "--full-refresh"]) - - # the tests touch these files, store their contents in memory - initial_model = get_model_file(project, my_materialized_view) - - yield - - # and then reset them after the test runs - set_model_file(project, my_materialized_view, initial_model) - project.run_sql(f"drop schema if exists {project.test_schema} cascade") - - @pytest.fixture(scope="class", autouse=True) - def seeds(self): - return {"my_seed.csv": MY_SEED} - - @pytest.fixture(scope="class", autouse=True) - def models(self): - yield { - "my_table.sql": MY_TABLE, - "my_view.sql": MY_VIEW, - "my_base_table.sql": MY_BASE_TABLE, - "my_materialized_view.sql": MY_MATERIALIZED_VIEW, - } - - @staticmethod - def insert_record(project, table: BaseRelation, record: Tuple[int, int]): - my_id, value = record - project.run_sql(f"insert into {table} (id, value) values ({my_id}, {value})") - - @staticmethod - def refresh_materialized_view(project, materialized_view: BaseRelation): - sql = f""" - call bq.refresh_materialized_view( - '{materialized_view.database}.{materialized_view.schema}.{materialized_view.identifier}' - ) - """ - project.run_sql(sql) - @staticmethod - def query_row_count(project, relation: BaseRelation) -> int: - sql = f"select count(*) from {relation}" - return project.run_sql(sql, fetch="one")[0] +from tests.functional.adapter.materialized_view_tests._mixin import BigQueryMaterializedViewMixin - # look into syntax - @staticmethod - def query_relation_type(project, relation: BaseRelation) -> Optional[str]: - with get_connection(project.adapter) as conn: - table = conn.handle.get_table( - project.adapter.connections.get_bq_table( - relation.database, relation.schema, relation.identifier - ) - ) - return table.table_type.lower() +class TestBigqueryMaterializedViewsBasic(BigQueryMaterializedViewMixin, MaterializedViewBasic): def test_view_replaces_materialized_view(self, project, my_materialized_view): """ We don't support replacing a view with another object in dbt-bigquery unless we use --full-refresh From a9a458119794064ac7189a3d7dfffe28ee459ede Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 5 Oct 2023 21:05:20 -0400 Subject: [PATCH 40/53] change refresh_interval_minutes from an int to a float to match the bigquery docs --- .../bigquery/relation_configs/auto_refresh.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/dbt/adapters/bigquery/relation_configs/auto_refresh.py b/dbt/adapters/bigquery/relation_configs/auto_refresh.py index d3b1111b5..360f575ee 100644 --- a/dbt/adapters/bigquery/relation_configs/auto_refresh.py +++ b/dbt/adapters/bigquery/relation_configs/auto_refresh.py @@ -24,7 +24,7 @@ class BigQueryAutoRefreshConfig(BigQueryRelationConfigBase): """ enable_refresh: Optional[bool] = True - refresh_interval_minutes: Optional[int] = 30 + refresh_interval_minutes: Optional[float] = 30 max_staleness: Optional[str] = None @classmethod @@ -39,6 +39,10 @@ def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryAutoRefreshConfig": if max_staleness := config_dict.get("max_staleness"): kwargs_dict.update({"max_staleness": max_staleness}) + # avoid picking up defaults (e.g. refresh_interval_minutes = 30) when the user turns off refresh + if kwargs_dict.get("enable_refresh", True) is False: + kwargs_dict.update({"refresh-interval_minutes": None, "max_staleness": None}) + auto_refresh: "BigQueryAutoRefreshConfig" = super().from_dict(kwargs_dict) # type: ignore return auto_refresh @@ -67,9 +71,13 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, } config_dict = { "enable_refresh": bool_setting(options.get("enable_refresh")), - "refresh_interval_minutes": options.get("refresh_interval_minutes"), "max_staleness": options.get("max_staleness"), } + + if refresh_interval_minutes := options.get("refresh_interval_minutes"): + # refresh_interval_minutes + config_dict.update({"refresh_interval_minutes": float(refresh_interval_minutes)}) + return config_dict From b84beef7272a9a4b0621b8590fd11377bea4d8b2 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 5 Oct 2023 21:06:28 -0400 Subject: [PATCH 41/53] make partition optional on relation results since it cannot be queried yet --- .../bigquery/relation_configs/materialized_view.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index 88f1b0f3e..39ead4b15 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -150,11 +150,11 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, } # optional - partition_by: agate.Table = relation_results.get("partition_by") # type: ignore - if len(partition_by) > 0: - config_dict.update( - {"partition": PartitionConfig.parse_relation_results(partition_by[0])} - ) + if partition_by := relation_results.get("partition_by"): + if len(partition_by) > 0: + config_dict.update( + {"partition": PartitionConfig.parse_relation_results(partition_by[0])} + ) cluster_by: agate.Table = relation_results.get("cluster_by") # type: ignore if len(cluster_by) > 0: From 0b609f2391a1d8ffecff1519a0a250e2f91d608b Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 5 Oct 2023 21:07:00 -0400 Subject: [PATCH 42/53] initial draft of materialized view change tests --- .../test_materialized_view_changes.py | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py diff --git a/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py b/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py new file mode 100644 index 000000000..444914bcd --- /dev/null +++ b/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py @@ -0,0 +1,69 @@ +from dbt.tests.adapter.materialized_view.changes import ( + MaterializedViewChanges, + MaterializedViewChangesApplyMixin, + MaterializedViewChangesContinueMixin, + MaterializedViewChangesFailMixin, +) +from dbt.tests.util import get_connection, get_model_file, set_model_file + +from dbt.adapters.bigquery.relation_configs import BigQueryMaterializedViewConfig + +from tests.functional.adapter.materialized_view_tests._mixin import BigQueryMaterializedViewMixin + + +class BigQueryMaterializedViewChanges(BigQueryMaterializedViewMixin, MaterializedViewChanges): + @staticmethod + def check_start_state(project, materialized_view): + with get_connection(project.adapter): + results = project.adapter.describe_relation(materialized_view) + assert isinstance(results, BigQueryMaterializedViewConfig) + assert results.auto_refresh.enable_refresh is True + assert results.auto_refresh.refresh_interval_minutes == 60 + assert results.auto_refresh.max_staleness == "0-0 0 0:45:0" + assert results.cluster.fields == frozenset({"id", "value"}) + + @staticmethod + def change_config_via_alter(project, materialized_view): + initial_model = get_model_file(project, materialized_view) + new_model = initial_model.replace("enable_refresh=True", "enable_refresh=False") + set_model_file(project, materialized_view, new_model) + + @staticmethod + def check_state_alter_change_is_applied(project, materialized_view): + with get_connection(project.adapter): + results = project.adapter.describe_relation(materialized_view) + assert isinstance(results, BigQueryMaterializedViewConfig) + assert results.auto_refresh.enable_refresh is False + assert results.auto_refresh.refresh_interval_minutes is None + assert results.auto_refresh.max_staleness is None + + @staticmethod + def change_config_via_replace(project, materialized_view): + initial_model = get_model_file(project, materialized_view) + new_model = initial_model.replace('cluster_by=["id", "value"]', 'cluster_by="id"') + set_model_file(project, materialized_view, new_model) + + @staticmethod + def check_state_replace_change_is_applied(project, materialized_view): + with get_connection(project.adapter): + results = project.adapter.describe_relation(materialized_view) + assert isinstance(results, BigQueryMaterializedViewConfig) + assert results.cluster.fields == frozenset({"id"}) + + +class TestBigQueryMaterializedViewChangesApply( + BigQueryMaterializedViewChanges, MaterializedViewChangesApplyMixin +): + pass + + +class TestBigQueryMaterializedViewChangesContinue( + BigQueryMaterializedViewChanges, MaterializedViewChangesContinueMixin +): + pass + + +class TestBigQueryMaterializedViewChangesFail( + BigQueryMaterializedViewChanges, MaterializedViewChangesFailMixin +): + pass From 779504b61cc0ca935a03fabb81dda7017d4872f5 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Thu, 5 Oct 2023 21:11:04 -0400 Subject: [PATCH 43/53] build changeset for materialized view --- .../bigquery/relation_configs/materialized_view.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index 39ead4b15..ba25a7466 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -13,10 +13,7 @@ BigQueryAutoRefreshConfig, BigQueryAutoRefreshConfigChange, ) -from dbt.adapters.bigquery.relation_configs.partition import ( - PartitionConfig, - BigQueryPartitionConfigChange, -) +from dbt.adapters.bigquery.relation_configs.partition import PartitionConfig from dbt.adapters.bigquery.relation_configs.cluster import ( BigQueryClusterConfig, BigQueryClusterConfigChange, @@ -177,7 +174,6 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, @dataclass class BigQueryMaterializedViewConfigChangeset: auto_refresh: Optional[BigQueryAutoRefreshConfigChange] = None - partition: Optional[BigQueryPartitionConfigChange] = None cluster: Optional[BigQueryClusterConfigChange] = None expiration_timestamp: Optional[datetime] = None kms_key_name: Optional[str] = None @@ -189,7 +185,6 @@ def requires_full_refresh(self) -> bool: return any( { self.auto_refresh.requires_full_refresh if self.auto_refresh else False, - self.partition.requires_full_refresh if self.partition else False, self.cluster.requires_full_refresh if self.cluster else False, } ) @@ -198,8 +193,11 @@ def requires_full_refresh(self) -> bool: def has_changes(self) -> bool: return any( { - self.partition if self.partition else False, self.cluster if self.cluster else False, self.auto_refresh if self.auto_refresh else False, + self.expiration_timestamp if self.expiration_timestamp else False, + self.kms_key_name if self.kms_key_name else False, + self.labels if self.labels else False, + self.description if self.description else False, } ) From 876dad8779f9d84a5754f3e2ddd540ab3c3fee14 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Tue, 10 Oct 2023 01:25:34 -0400 Subject: [PATCH 44/53] implement change monitoring for autorefresh and clustering on materialized views, add describe_relation method on BigQueryAdapter for utility --- dbt/adapters/bigquery/impl.py | 46 +----- dbt/adapters/bigquery/relation.py | 27 +-- .../bigquery/relation_configs/__init__.py | 8 +- .../bigquery/relation_configs/auto_refresh.py | 90 ---------- .../relation_configs/materialized_view.py | 97 ++--------- .../bigquery/relation_configs/options.py | 154 ++++++++++++++++++ dbt/adapters/bigquery/utility.py | 34 +++- .../relations/materialized_view/alter.sql | 8 +- .../relations/materialized_view/create.sql | 2 +- .../relations/materialized_view/replace.sql | 2 +- .../adapter/describe_relation/_files.py | 20 +++ .../test_describe_relation.py | 26 ++- .../test_materialized_view_changes.py | 15 +- 13 files changed, 271 insertions(+), 258 deletions(-) delete mode 100644 dbt/adapters/bigquery/relation_configs/auto_refresh.py create mode 100644 dbt/adapters/bigquery/relation_configs/options.py diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index c5177e19a..cc0a6c5f3 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -44,7 +44,11 @@ ServerlessDataProcHelper, ) from dbt.adapters.bigquery.relation import BigQueryRelation -from dbt.adapters.bigquery.relation_configs import BigQueryMaterializedViewConfig, PartitionConfig +from dbt.adapters.bigquery.relation_configs import ( + BigQueryMaterializedViewConfig, + PartitionConfig, +) +from dbt.adapters.bigquery.utility import sql_escape logger = AdapterLogger("BigQuery") @@ -57,12 +61,6 @@ _dataset_lock = threading.Lock() -def sql_escape(string): - if not isinstance(string, str): - raise dbt.exceptions.CompilationError(f"cannot escape a non-string: {string}") - return json.dumps(string)[1:-1] - - @dataclass class GrantTarget(dbtClassMixin): dataset: str @@ -751,38 +749,6 @@ def get_view_options(self, config: Dict[str, Any], node: Dict[str, Any]) -> Dict opts = self.get_common_options(config, node) return opts - @available.parse(lambda *a, **k: {}) - def get_materialized_view_options( - self, - materialized_view: BigQueryMaterializedViewConfig, - ) -> Dict[str, Any]: - opts: Dict[str, Any] = {} - - if expiration_timestamp := materialized_view.expiration_timestamp: - opts.update({"expiration_timestamp": expiration_timestamp}) - - if description := materialized_view.description: - escaped_description = sql_escape(description) - opts.update({"description": f'"""{escaped_description}"""'}) - - if labels := materialized_view.labels: - opts.update({"labels": list(labels.items())}) - - if kms_key_name := materialized_view.kms_key_name: - opts.update({"kms_key_name": f"'{kms_key_name}'"}) - - if auto_refresh := materialized_view.auto_refresh: - opts.update( - { - "enable_refresh": auto_refresh.enable_refresh, - "refresh_interval_minutes": auto_refresh.refresh_interval_minutes, - } - ) - if max_staleness := auto_refresh.max_staleness: - opts.update({"max_staleness": max_staleness}) - - return opts - def describe_relation(self, relation: BigQueryRelation): if relation.type == RelationType.MaterializedView: macro = "bigquery__describe_materialized_view" @@ -790,7 +756,7 @@ def describe_relation(self, relation: BigQueryRelation): else: raise dbt.exceptions.DbtRuntimeError( f"The method `BigQueryAdapter.describe_relation` is not implemented " - f"for the relation type: {relation.type}." + f"for the relation type: {relation.type}" ) relation_results = self.execute_macro(macro, kwargs={"relation": relation}) return parser.from_relation_results(relation_results) diff --git a/dbt/adapters/bigquery/relation.py b/dbt/adapters/bigquery/relation.py index d753da1fd..184e133a1 100644 --- a/dbt/adapters/bigquery/relation.py +++ b/dbt/adapters/bigquery/relation.py @@ -6,12 +6,11 @@ from dbt.adapters.base.relation import BaseRelation, ComponentName, InformationSchema from dbt.adapters.relation_configs import RelationResults, RelationConfigChangeAction from dbt.adapters.bigquery.relation_configs import ( - BigQueryQuotePolicy, + BigQueryClusterConfigChange, BigQueryMaterializedViewConfig, BigQueryMaterializedViewConfigChangeset, - BigQueryAutoRefreshConfigChange, - BigQueryClusterConfigChange, - BigQueryPartitionConfigChange, + BigQueryOptionsConfigChange, + BigQueryQuotePolicy, ) from dbt.contracts.graph.nodes import ModelNode from dbt.contracts.relation import RelationType @@ -71,7 +70,7 @@ def dataset(self): def materialized_view_from_model_node( cls, model_node: ModelNode ) -> BigQueryMaterializedViewConfig: - return BigQueryMaterializedViewConfig.from_model_node(model_node) + return BigQueryMaterializedViewConfig.from_model_node(model_node) # type: ignore @classmethod def materialized_view_config_changeset( @@ -85,13 +84,10 @@ def materialized_view_config_changeset( assert isinstance(existing_materialized_view, BigQueryMaterializedViewConfig) assert isinstance(new_materialized_view, BigQueryMaterializedViewConfig) - if ( - new_materialized_view.auto_refresh != existing_materialized_view.auto_refresh - and new_materialized_view.auto_refresh - ): - config_change_collection.auto_refresh = BigQueryAutoRefreshConfigChange( + if new_materialized_view.options != existing_materialized_view.options: + config_change_collection.options = BigQueryOptionsConfigChange( action=RelationConfigChangeAction.alter, - context=new_materialized_view.auto_refresh, + context=new_materialized_view.options, ) if ( @@ -103,15 +99,6 @@ def materialized_view_config_changeset( context=new_materialized_view.cluster, ) - if ( - new_materialized_view.partition != existing_materialized_view.partition - and new_materialized_view.partition - ): - config_change_collection.partition = BigQueryPartitionConfigChange( - action=RelationConfigChangeAction.alter, - context=new_materialized_view.partition, - ) - if config_change_collection: return config_change_collection return None diff --git a/dbt/adapters/bigquery/relation_configs/__init__.py b/dbt/adapters/bigquery/relation_configs/__init__.py index e50246bbf..af1a0a9b0 100644 --- a/dbt/adapters/bigquery/relation_configs/__init__.py +++ b/dbt/adapters/bigquery/relation_configs/__init__.py @@ -1,7 +1,3 @@ -from dbt.adapters.bigquery.relation_configs.auto_refresh import ( - BigQueryAutoRefreshConfig, - BigQueryAutoRefreshConfigChange, -) from dbt.adapters.bigquery.relation_configs.cluster import ( BigQueryClusterConfig, BigQueryClusterConfigChange, @@ -10,6 +6,10 @@ BigQueryMaterializedViewConfig, BigQueryMaterializedViewConfigChangeset, ) +from dbt.adapters.bigquery.relation_configs.options import ( + BigQueryOptionsConfig, + BigQueryOptionsConfigChange, +) from dbt.adapters.bigquery.relation_configs.partition import ( PartitionConfig, BigQueryPartitionConfigChange, diff --git a/dbt/adapters/bigquery/relation_configs/auto_refresh.py b/dbt/adapters/bigquery/relation_configs/auto_refresh.py deleted file mode 100644 index 360f575ee..000000000 --- a/dbt/adapters/bigquery/relation_configs/auto_refresh.py +++ /dev/null @@ -1,90 +0,0 @@ -from dataclasses import dataclass -from typing import Any, Dict, Optional - -import agate -from dbt.adapters.relation_configs import RelationConfigChange, RelationResults -from dbt.contracts.graph.nodes import ModelNode - -from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase -from dbt.adapters.bigquery.utility import bool_setting - - -@dataclass(frozen=True, eq=True, unsafe_hash=True) -class BigQueryAutoRefreshConfig(BigQueryRelationConfigBase): - """ - This config manages materialized view options supporting automatic refresh. See the following for more information: - - https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#materialized_view_option_list - - https://cloud.google.com/bigquery/docs/materialized-views-create#manage_staleness_and_refresh_frequency - - - enable_refresh: enables automatic refresh based on `refresh_interval_minutes` - - refresh_interval_minutes: frequency at which a materialized view will be refreshed - - max_staleness: if the last refresh is within the max_staleness interval, - BigQuery returns data directly from the materialized view (faster/cheaper) without reading the base table, - otherwise it reads from the base table (slower/more expensive) to meet the staleness requirement - """ - - enable_refresh: Optional[bool] = True - refresh_interval_minutes: Optional[float] = 30 - max_staleness: Optional[str] = None - - @classmethod - def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryAutoRefreshConfig": - kwargs_dict = {} - - # optional - if "enable_refresh" in config_dict: # boolean - kwargs_dict.update({"enable_refresh": config_dict.get("enable_refresh")}) - if refresh_interval_minutes := config_dict.get("refresh_interval_minutes"): - kwargs_dict.update({"refresh_interval_minutes": refresh_interval_minutes}) - if max_staleness := config_dict.get("max_staleness"): - kwargs_dict.update({"max_staleness": max_staleness}) - - # avoid picking up defaults (e.g. refresh_interval_minutes = 30) when the user turns off refresh - if kwargs_dict.get("enable_refresh", True) is False: - kwargs_dict.update({"refresh-interval_minutes": None, "max_staleness": None}) - - auto_refresh: "BigQueryAutoRefreshConfig" = super().from_dict(kwargs_dict) # type: ignore - return auto_refresh - - @classmethod - def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: - config_dict = {} - - # check for the key since this is a boolean - if "enable_refresh" in model_node.config.extra: - enable_refresh = model_node.config.extra.get("enable_refresh") - config_dict.update({"enable_refresh": bool_setting(enable_refresh)}) - - if refresh_interval_minutes := model_node.config.extra.get("refresh_interval_minutes"): - config_dict.update({"refresh_interval_minutes": refresh_interval_minutes}) - - if max_staleness := model_node.config.extra.get("max_staleness"): - config_dict.update({"max_staleness": max_staleness}) - - return config_dict - - @classmethod - def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, Any]: - options_config: agate.Table = relation_results.get("options") # type: ignore - options = { - option.get("option_name"): option.get("option_value") for option in options_config - } - config_dict = { - "enable_refresh": bool_setting(options.get("enable_refresh")), - "max_staleness": options.get("max_staleness"), - } - - if refresh_interval_minutes := options.get("refresh_interval_minutes"): - # refresh_interval_minutes - config_dict.update({"refresh_interval_minutes": float(refresh_interval_minutes)}) - - return config_dict - - -@dataclass(frozen=True, eq=True, unsafe_hash=True) -class BigQueryAutoRefreshConfigChange(RelationConfigChange): - context: BigQueryAutoRefreshConfig - - @property - def requires_full_refresh(self) -> bool: - return False diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index ba25a7466..f3e3914f9 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -1,17 +1,15 @@ from dataclasses import dataclass -from datetime import datetime, timedelta from typing import Any, Dict, Optional import agate from dbt.adapters.relation_configs import RelationResults from dbt.contracts.graph.nodes import ModelNode from dbt.contracts.relation import ComponentName -from dbt.exceptions import DbtRuntimeError from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase -from dbt.adapters.bigquery.relation_configs.auto_refresh import ( - BigQueryAutoRefreshConfig, - BigQueryAutoRefreshConfigChange, +from dbt.adapters.bigquery.relation_configs.options import ( + BigQueryOptionsConfig, + BigQueryOptionsConfigChange, ) from dbt.adapters.bigquery.relation_configs.partition import PartitionConfig from dbt.adapters.bigquery.relation_configs.cluster import ( @@ -28,27 +26,19 @@ class BigQueryMaterializedViewConfig(BigQueryRelationConfigBase): The following parameters are configurable by dbt: - materialized_view_name: name of the materialized view - - schema: dataset name of the materialized view - - database: project name of the database - - auto_refresh: object containing refresh scheduling information + - schema_name: dataset name of the materialized view + - database_name: project name of the database + - options: options that get set in `SET OPTIONS()` clause - partition: object containing partition information - cluster: object containing cluster information - - expiration_timestamp: the time when table expires - - kms_key_name: user defined Cloud KMS encryption key - - labels: used to organized and group objects - - description: user description for materialized view """ materialized_view_name: str schema_name: str database_name: str - auto_refresh: BigQueryAutoRefreshConfig + options: BigQueryOptionsConfig partition: Optional[PartitionConfig] = None cluster: Optional[BigQueryClusterConfig] = None - expiration_timestamp: Optional[datetime] = None - kms_key_name: Optional[str] = None - labels: Optional[Dict[str, str]] = None - description: Optional[str] = None @classmethod def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryMaterializedViewConfig": @@ -61,7 +51,7 @@ def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryMaterializedViewConf "database_name": cls._render_part( ComponentName.Database, config_dict["database_name"] ), - "auto_refresh": BigQueryAutoRefreshConfig.from_dict(config_dict["auto_refresh"]), + "options": BigQueryOptionsConfig.from_dict(config_dict["options"]), } # optional @@ -71,39 +61,17 @@ def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryMaterializedViewConf if cluster := config_dict.get("cluster"): kwargs_dict.update({"cluster": BigQueryClusterConfig.from_dict(cluster)}) - optional_attributes = [ - "expiration_timestamp", - "kms_key_name", - "labels", - "description", - ] - optional_attributes_set_by_user = { - k: v for k, v in config_dict.items() if k in optional_attributes - } - kwargs_dict.update(optional_attributes_set_by_user) - materialized_view: "BigQueryMaterializedViewConfig" = super().from_dict(kwargs_dict) # type: ignore return materialized_view - @classmethod - def from_model_node(cls, model_node: ModelNode) -> "BigQueryMaterializedViewConfig": - materialized_view = super().from_model_node(model_node) - if isinstance(materialized_view, BigQueryMaterializedViewConfig): - return materialized_view - else: - raise DbtRuntimeError( - f"An unexpected error occurred in BigQueryMaterializedViewConfig.from_model_node:\n" - f" Expected: BigQueryMaterializedViewConfig\n" - f" Actual: {materialized_view}" - ) - @classmethod def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: config_dict = { "materialized_view_name": model_node.identifier, "schema_name": model_node.schema, "database_name": model_node.database, - "auto_refresh": BigQueryAutoRefreshConfig.parse_model_node(model_node), + # despite this being a foreign object, there will always be options because of defaults + "options": BigQueryOptionsConfig.parse_model_node(model_node), } # optional @@ -113,37 +81,19 @@ def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: if "cluster_by" in model_node.config: config_dict.update({"cluster": BigQueryClusterConfig.parse_model_node(model_node)}) - if hours_to_expiration := model_node.config.extra.get("hours_to_expiration"): - config_dict.update( - {"expiration_timestamp": datetime.now() + timedelta(hours=hours_to_expiration)} - ) - - if kms_key_name := model_node.config.extra.get("kms_key_name"): - config_dict.update({"kms_key_name": kms_key_name}) - - if labels := model_node.config.extra.get("labels"): - config_dict.update({"labels": labels}) - - if description := model_node.config.extra.get("description"): - if model_node.config.persist_docs: - config_dict.update({"description": description}) - return config_dict @classmethod def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, Any]: materialized_view_config: agate.Table = relation_results.get("materialized_view") # type: ignore materialized_view: agate.Row = cls._get_first_row(materialized_view_config) - options_config: agate.Table = relation_results.get("options") # type: ignore - options = { - option.get("option_name"): option.get("option_value") for option in options_config.rows - } config_dict = { "materialized_view_name": materialized_view.get("table_name"), "schema_name": materialized_view.get("table_schema"), "database_name": materialized_view.get("table_catalog"), - "auto_refresh": BigQueryAutoRefreshConfig.parse_relation_results(relation_results), + # despite this being a foreign object, there will always be options because of defaults + "options": BigQueryOptionsConfig.parse_relation_results(relation_results), } # optional @@ -159,32 +109,19 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, {"cluster": BigQueryClusterConfig.parse_relation_results(cluster_by)} ) - config_dict.update( - { - "expiration_timestamp": options.get("expiration_timestamp"), - "kms_key_name": options.get("kms_key_name"), - "labels": options.get("labels"), - "description": options.get("description"), - } - ) - return config_dict @dataclass class BigQueryMaterializedViewConfigChangeset: - auto_refresh: Optional[BigQueryAutoRefreshConfigChange] = None + options: Optional[BigQueryOptionsConfigChange] = None cluster: Optional[BigQueryClusterConfigChange] = None - expiration_timestamp: Optional[datetime] = None - kms_key_name: Optional[str] = None - labels: Optional[Dict[str, str]] = None - description: Optional[str] = None @property def requires_full_refresh(self) -> bool: return any( { - self.auto_refresh.requires_full_refresh if self.auto_refresh else False, + self.options.requires_full_refresh if self.options else False, self.cluster.requires_full_refresh if self.cluster else False, } ) @@ -193,11 +130,7 @@ def requires_full_refresh(self) -> bool: def has_changes(self) -> bool: return any( { + self.options if self.options else False, self.cluster if self.cluster else False, - self.auto_refresh if self.auto_refresh else False, - self.expiration_timestamp if self.expiration_timestamp else False, - self.kms_key_name if self.kms_key_name else False, - self.labels if self.labels else False, - self.description if self.description else False, } ) diff --git a/dbt/adapters/bigquery/relation_configs/options.py b/dbt/adapters/bigquery/relation_configs/options.py new file mode 100644 index 000000000..a84d7dd19 --- /dev/null +++ b/dbt/adapters/bigquery/relation_configs/options.py @@ -0,0 +1,154 @@ +from dataclasses import dataclass +from datetime import datetime, timedelta +from typing import Any, Dict, List, Optional + +import agate +from dbt.adapters.relation_configs import RelationConfigChange, RelationResults +from dbt.contracts.graph.nodes import ModelNode + +from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase +from dbt.adapters.bigquery.utility import bool_setting, float_setting, sql_escape + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class BigQueryOptionsConfig(BigQueryRelationConfigBase): + """ + This config manages materialized view options. See the following for more information: + - https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#materialized_view_option_list + """ + + enable_refresh: Optional[bool] = True + refresh_interval_minutes: Optional[float] = 30 + expiration_timestamp: Optional[datetime] = None + max_staleness: Optional[str] = None + kms_key_name: Optional[str] = None + description: Optional[str] = None + labels: Optional[Dict[str, str]] = None + + @classmethod + def user_configurable_options(cls) -> List[str]: + return [ + "enable_refresh", + "refresh_interval_minutes", + "expiration_timestamp", + "max_staleness", + "kms_key_name", + "description", + "labels", + ] + + def as_ddl_dict(self) -> Dict[str, Any]: + """ + Reformat `options_dict` so that it can be passed into the `bigquery_options()` macro. + + Options should be flattened and filtered prior to passing into this method. For example: + - the "auto refresh" set of options should be flattened into the root instead of stuck under "auto_refresh" + - any option that comes in set as `None` will be unset; this happens mostly due to config changes + """ + + def boolean(x): + return x + + def numeric(x): + return x + + def string(x): + return f"'{x}'" + + def escaped_string(x): + return f'"""{sql_escape(x)}"""' + + def interval(x): + return x + + def array(x): + return list(x.items()) + + option_formatters = { + "enable_refresh": boolean, + "refresh_interval_minutes": numeric, + "expiration_timestamp": interval, + "max_staleness": interval, + "kms_key_name": string, + "description": escaped_string, + "labels": array, + } + + def formatted_option(name: str) -> Optional[Any]: + value = getattr(self, name) + if value is not None: + formatter = option_formatters[name] + return formatter(value) + return None + + options = { + option: formatted_option(option) + for option, option_formatter in option_formatters.items() + if formatted_option(option) is not None + } + + return options + + @classmethod + def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryOptionsConfig": + setting_formatters = { + "enable_refresh": bool_setting, + "refresh_interval_minutes": float_setting, + "expiration_timestamp": None, + "max_staleness": None, + "kms_key_name": None, + "description": None, + "labels": None, + } + + def formatted_setting(name: str) -> Any: + value = config_dict.get(name) + if formatter := setting_formatters[name]: + return formatter(value) + return value + + kwargs_dict = {attribute: formatted_setting(attribute) for attribute in setting_formatters} + + # avoid picking up defaults on dependent options + # e.g. don't set `refresh_interval_minutes` = 30 when the user has `enable_refresh` = False + if kwargs_dict["enable_refresh"] is False: + kwargs_dict.update({"refresh_interval_minutes": None, "max_staleness": None}) + + options: "BigQueryOptionsConfig" = super().from_dict(kwargs_dict) # type: ignore + return options + + @classmethod + def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: + config_dict = { + option: model_node.config.extra.get(option) + for option in cls.user_configurable_options() + } + + # update dbt-specific versions of these settings + if hours_to_expiration := model_node.config.extra.get("hours_to_expiration"): + config_dict.update( + {"expiration_timestamp": datetime.now() + timedelta(hours=hours_to_expiration)} + ) + if not model_node.config.persist_docs: + del config_dict["description"] + + return config_dict + + @classmethod + def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, Any]: + options_config: agate.Table = relation_results.get("options") # type: ignore + config_dict = { + option.get("option_name"): option.get("option_value") + for option in options_config + if option.get("option_name") in cls.user_configurable_options() + } + return config_dict + + +@dataclass(frozen=True, eq=True, unsafe_hash=True) +class BigQueryOptionsConfigChange(RelationConfigChange): + context: BigQueryOptionsConfig + + @property + def requires_full_refresh(self) -> bool: + return False diff --git a/dbt/adapters/bigquery/utility.py b/dbt/adapters/bigquery/utility.py index dd7c3bb8e..c66b2d97d 100644 --- a/dbt/adapters/bigquery/utility.py +++ b/dbt/adapters/bigquery/utility.py @@ -1,5 +1,8 @@ +import json from typing import Any, Optional +import dbt.exceptions + def bool_setting(value: Optional[Any] = None) -> Optional[bool]: if value is None: @@ -7,15 +10,36 @@ def bool_setting(value: Optional[Any] = None) -> Optional[bool]: elif isinstance(value, bool): return value elif isinstance(value, str): - if value.lower() in ["true", "false"]: - return bool(value) + # don't do bool(value) as that is equivalent to: len(value) > 0 + if value.lower() == "true": + return True + elif value.lower() == "false": + return False else: raise ValueError( f"Invalid input, " - f"expecting bool or str ex. (True, False, 'true', 'False'), recieved: {value}" + f"expecting `bool` or `str` ex. (True, False, 'true', 'False'), received: {value}" ) else: raise TypeError( - f"Invalide type for bool evaluation, " - f"expecting bool or str, recieved: {type(value)}" + f"Invalid type for bool evaluation, " + f"expecting `bool` or `str`, received: {type(value)}" + ) + + +def float_setting(value: Optional[Any] = None) -> Optional[float]: + if value is None: + return None + elif any(isinstance(value, i) for i in [int, float, str]): + return float(value) + else: + raise TypeError( + f"Invalid type for float evaluation, " + f"expecting `int`, `float`, or `str`, received: {type(value)}" ) + + +def sql_escape(string): + if not isinstance(string, str): + raise dbt.exceptions.CompilationError(f"cannot escape a non-string: {string}") + return json.dumps(string)[1:-1] diff --git a/dbt/include/bigquery/macros/relations/materialized_view/alter.sql b/dbt/include/bigquery/macros/relations/materialized_view/alter.sql index 00f43073b..b0381a7bf 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/alter.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/alter.sql @@ -11,17 +11,11 @@ {{ get_replace_sql(existing_relation, relation, sql) }} {% else %} - {%- set auto_refresh = configuration_changes.auto_refresh -%} - {%- if auto_refresh -%}{{- log('Applying UPDATE AUTOREFRESH to: ' ~ relation) -}}{%- endif -%} - alter materialized view {{ relation }} - set options ( - {% if auto_refresh %} enable_refresh = {{ auto_refresh.context }}{% endif %} - ) + set {{ bigquery_options(configuration_changes.options.context.as_ddl_dict()) }} {%- endif %} - {% endmacro %} {% macro bigquery__get_materialized_view_configuration_changes(existing_relation, new_config) %} diff --git a/dbt/include/bigquery/macros/relations/materialized_view/create.sql b/dbt/include/bigquery/macros/relations/materialized_view/create.sql index c8ccf0676..d2b6c5be5 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/create.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/create.sql @@ -5,7 +5,7 @@ create materialized view if not exists {{ relation }} {% if materialized_view.partition %}{{ partition_by(materialized_view.partition) }}{% endif %} {% if materialized_view.cluster %}{{ cluster_by(materialized_view.cluster.fields) }}{% endif %} - {{ bigquery_options(adapter.get_materialized_view_options(materialized_view)) }} + {{ bigquery_options(materialized_view.options.as_ddl_dict()) }} as {{ sql }} {% endmacro %} diff --git a/dbt/include/bigquery/macros/relations/materialized_view/replace.sql b/dbt/include/bigquery/macros/relations/materialized_view/replace.sql index ac76b5c9f..4fdfd21f6 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/replace.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/replace.sql @@ -5,7 +5,7 @@ create or replace materialized view if not exists {{ relation }} {% if materialized_view.partition %}{{ partition_by(materialized_view.partition) }}{% endif %} {% if materialized_view.cluster %}{{ cluster_by(materialized_view.cluster.fields) }}{% endif %} - {{ bigquery_options(adapter.get_materialized_view_options(materialized_view)) }} + {{ bigquery_options(materialized_view.options.as_ddl_dict()) }} as {{ sql }} {% endmacro %} diff --git a/tests/functional/adapter/describe_relation/_files.py b/tests/functional/adapter/describe_relation/_files.py index 00979ca1e..e5d330500 100644 --- a/tests/functional/adapter/describe_relation/_files.py +++ b/tests/functional/adapter/describe_relation/_files.py @@ -38,3 +38,23 @@ record_date from {{ ref('my_base_table') }} """ + + +MY_OTHER_MATERIALIZED_VIEW = """ +{{ config( + materialized='materialized_view', + partition_by={ + "field": "record_date", + "data_type": "timestamp", + "granularity": "day" + }, + cluster_by="id", + enable_refresh=False, + refresh_interval_minutes=60 +) }} +select + id, + value, + record_date +from {{ ref('my_base_table') }} +""" diff --git a/tests/functional/adapter/describe_relation/test_describe_relation.py b/tests/functional/adapter/describe_relation/test_describe_relation.py index 54b72fcd8..4d6c77cca 100644 --- a/tests/functional/adapter/describe_relation/test_describe_relation.py +++ b/tests/functional/adapter/describe_relation/test_describe_relation.py @@ -8,6 +8,7 @@ from tests.functional.adapter.describe_relation._files import ( MY_BASE_TABLE, MY_MATERIALIZED_VIEW, + MY_OTHER_MATERIALIZED_VIEW, MY_SEED, ) @@ -22,6 +23,7 @@ def models(self): yield { "my_base_table.sql": MY_BASE_TABLE, "my_materialized_view.sql": MY_MATERIALIZED_VIEW, + "my_other_materialized_view.sql": MY_OTHER_MATERIALIZED_VIEW, } @pytest.fixture(scope="class") @@ -57,6 +59,15 @@ def my_materialized_view(self, project) -> BaseRelation: type=RelationType.MaterializedView, ) + @pytest.fixture(scope="class") + def my_other_materialized_view(self, project) -> BaseRelation: + return project.adapter.Relation.create( + identifier="my_other_materialized_view", + schema=project.test_schema, + database=project.database, + type=RelationType.MaterializedView, + ) + @pytest.fixture(scope="class", autouse=True) def setup(self, project, my_base_table, my_materialized_view): run_dbt(["seed"]) @@ -72,5 +83,16 @@ def test_describe_materialized_view(self, project, my_materialized_view): assert results.schema_name == f'"{my_materialized_view.schema}"' assert results.database_name == f'"{my_materialized_view.database}"' assert results.cluster.fields == frozenset({"id"}) - assert results.auto_refresh.enable_refresh is True - assert results.auto_refresh.refresh_interval_minutes == 30 + assert results.options.enable_refresh is True + assert results.options.refresh_interval_minutes == 30 + + def test_describe_other_materialized_view(self, project, my_other_materialized_view): + with get_connection(project.adapter): + results = project.adapter.describe_relation(my_other_materialized_view) + assert isinstance(results, BigQueryMaterializedViewConfig) + assert results.materialized_view_name == f'"{my_other_materialized_view.identifier}"' + assert results.schema_name == f'"{my_other_materialized_view.schema}"' + assert results.database_name == f'"{my_other_materialized_view.database}"' + assert results.cluster.fields == frozenset({"id"}) + assert results.options.enable_refresh is False + assert results.options.refresh_interval_minutes == 30 # BQ returns it to the default diff --git a/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py b/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py index 444914bcd..826ec5d24 100644 --- a/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py +++ b/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py @@ -17,9 +17,9 @@ def check_start_state(project, materialized_view): with get_connection(project.adapter): results = project.adapter.describe_relation(materialized_view) assert isinstance(results, BigQueryMaterializedViewConfig) - assert results.auto_refresh.enable_refresh is True - assert results.auto_refresh.refresh_interval_minutes == 60 - assert results.auto_refresh.max_staleness == "0-0 0 0:45:0" + assert results.options.enable_refresh is True + assert results.options.refresh_interval_minutes == 60 + assert results.options.max_staleness == "0-0 0 0:45:0" # ~= "INTERVAL 45 MINUTE" assert results.cluster.fields == frozenset({"id", "value"}) @staticmethod @@ -33,9 +33,12 @@ def check_state_alter_change_is_applied(project, materialized_view): with get_connection(project.adapter): results = project.adapter.describe_relation(materialized_view) assert isinstance(results, BigQueryMaterializedViewConfig) - assert results.auto_refresh.enable_refresh is False - assert results.auto_refresh.refresh_interval_minutes is None - assert results.auto_refresh.max_staleness is None + # these change when run manually + assert results.options.enable_refresh is False + assert results.options.refresh_interval_minutes == 30 # BQ returns it to the default + # this does not change when run manually + # in fact, it doesn't even show up in the DDL whereas the other two do + assert results.options.max_staleness is None @staticmethod def change_config_via_replace(project, materialized_view): From 9a23f2f00cd7f899034a2eeecc09408859533bfc Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Tue, 10 Oct 2023 17:32:26 -0400 Subject: [PATCH 45/53] committing to park changes and wrap up other 1.7 items --- dbt/adapters/bigquery/impl.py | 12 ++++++++++- .../relation_configs/materialized_view.py | 5 +---- .../bigquery/relation_configs/partition.py | 20 ++++++++++--------- .../partition/describe.sql | 4 ++-- .../relations/materialized_view/describe.sql | 7 +++++++ 5 files changed, 32 insertions(+), 16 deletions(-) diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index cc0a6c5f3..5eafc2e2f 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -32,7 +32,7 @@ import google.auth import google.oauth2 import google.cloud.bigquery -from google.cloud.bigquery import AccessEntry, SchemaField +from google.cloud.bigquery import AccessEntry, SchemaField, Table import google.cloud.exceptions from dbt.adapters.bigquery import BigQueryColumn, BigQueryConnectionManager @@ -555,6 +555,16 @@ def parse_partition_by(self, raw_partition_by: Any) -> Optional[PartitionConfig] def get_table_ref_from_relation(self, relation: BaseRelation): return self.connections.table_ref(relation.database, relation.schema, relation.identifier) + @available.parse(lambda *a, **k: True) + def get_table(self, relation: BigQueryRelation) -> Optional[Table]: + try: + table = self.connections.get_bq_table( + database=relation.database, schema=relation.schema, identifier=relation.identifier + ) + except google.cloud.exceptions.NotFound: + table = None + return table + def _update_column_dict(self, bq_column_dict, dbt_columns, parent=""): """ Helper function to recursively traverse the schema of a table in the diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index f3e3914f9..ef6a223d2 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -98,10 +98,7 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, # optional if partition_by := relation_results.get("partition_by"): - if len(partition_by) > 0: - config_dict.update( - {"partition": PartitionConfig.parse_relation_results(partition_by[0])} - ) + config_dict.update({"partition": PartitionConfig.parse_relation_results(partition_by)}) # type: ignore cluster_by: agate.Table = relation_results.get("cluster_by") # type: ignore if len(cluster_by) > 0: diff --git a/dbt/adapters/bigquery/relation_configs/partition.py b/dbt/adapters/bigquery/relation_configs/partition.py index 9c9714d7f..8dd193ad1 100644 --- a/dbt/adapters/bigquery/relation_configs/partition.py +++ b/dbt/adapters/bigquery/relation_configs/partition.py @@ -1,11 +1,11 @@ from dataclasses import dataclass from typing import Any, Dict, List, Optional -import agate from dbt.contracts.graph.nodes import ModelNode from dbt.dataclass_schema import dbtClassMixin, ValidationError import dbt.exceptions from dbt.adapters.relation_configs import RelationConfigChange +from google.cloud.bigquery.table import Table @dataclass @@ -108,21 +108,23 @@ def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: return model_node.config.extra.get("partition_by") @classmethod - def parse_relation_results(cls, describe_relation_results: agate.Row) -> Dict[str, Any]: + def parse_relation_results(cls, bq_table: Table) -> Dict[str, Any]: """ - Parse the results of a describe query into a raw config for `PartitionConfig.parse` + Parse the results of a BQ Table object into a raw config for `PartitionConfig.parse` """ + range_partitioning = bq_table.range_partitioning + time_partitioning = bq_table.time_partitioning config_dict = { - "field": describe_relation_results.get("partition_column_name"), - "data_type": describe_relation_results.get("partition_data_type"), - "granularity": describe_relation_results.get("partition_type"), + "field": time_partitioning.field, + "data_type": "", + "granularity": time_partitioning.type_, } # combine range fields into dictionary, like the model config range_dict = { - "start": describe_relation_results.get("partition_start"), - "end": describe_relation_results.get("partition_end"), - "interval": describe_relation_results.get("partition_interval"), + "start": range_partitioning.range_.start, + "end": range_partitioning.range_.end, + "interval": range_partitioning.range_.interval, } config_dict.update({"range": range_dict}) diff --git a/dbt/include/bigquery/macros/relation_components/partition/describe.sql b/dbt/include/bigquery/macros/relation_components/partition/describe.sql index e8e205801..1efdd56a1 100644 --- a/dbt/include/bigquery/macros/relation_components/partition/describe.sql +++ b/dbt/include/bigquery/macros/relation_components/partition/describe.sql @@ -37,6 +37,6 @@ {% macro bigquery__describe_partition(relation) %} - {% set _sql = bigquery__get_describe_partition_sql(relation) %} - {% do return(run_query(_sql)) %} + {% set bq_relation = adapter.connections.get_bq_table(relation.database, relation.schema, relation.identifier) %} + {% do return(bq_relation) %} {% endmacro %} diff --git a/dbt/include/bigquery/macros/relations/materialized_view/describe.sql b/dbt/include/bigquery/macros/relations/materialized_view/describe.sql index 231443cf8..64bbdaee8 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/describe.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/describe.sql @@ -1,4 +1,10 @@ {% macro bigquery__describe_materialized_view(relation) %} + {% set bq_relation = adapter.get_table(relation) %} + {% do return(bq_relation) %} +{% endmacro %} + + +{% macro bigquery__describe_materialized_view_sql(relation) %} {%- set _materialized_view_sql -%} select table_name, @@ -16,6 +22,7 @@ {% do return({ 'materialized_view': _materialized_view, + 'partition_by': bigquery__describe_partition(relation), 'cluster_by': _cluster_by, 'options': _options }) %} From 9aafbb9261446da8bdc262b855c43164c2875299 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Tue, 10 Oct 2023 23:42:59 -0400 Subject: [PATCH 46/53] update describe to use the sdk instead of sql to pick up partition information --- dbt/adapters/bigquery/impl.py | 27 +++++---- dbt/adapters/bigquery/relation.py | 12 ++-- .../bigquery/relation_configs/_base.py | 14 ++--- .../bigquery/relation_configs/cluster.py | 6 +- .../relation_configs/materialized_view.py | 57 ++++++++----------- .../bigquery/relation_configs/options.py | 42 +++++++------- .../bigquery/relation_configs/partition.py | 40 +++++++------ .../relations/materialized_view/alter.sql | 2 +- .../relations/materialized_view/describe.sql | 29 ---------- .../adapter/describe_relation/_files.py | 46 ++++++++++++--- .../test_describe_relation.py | 34 +++++------ 11 files changed, 151 insertions(+), 158 deletions(-) delete mode 100644 dbt/include/bigquery/macros/relations/materialized_view/describe.sql diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index 5eafc2e2f..5578a4a27 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -32,7 +32,7 @@ import google.auth import google.oauth2 import google.cloud.bigquery -from google.cloud.bigquery import AccessEntry, SchemaField, Table +from google.cloud.bigquery import AccessEntry, SchemaField, Table as BigQueryTable import google.cloud.exceptions from dbt.adapters.bigquery import BigQueryColumn, BigQueryConnectionManager @@ -555,16 +555,6 @@ def parse_partition_by(self, raw_partition_by: Any) -> Optional[PartitionConfig] def get_table_ref_from_relation(self, relation: BaseRelation): return self.connections.table_ref(relation.database, relation.schema, relation.identifier) - @available.parse(lambda *a, **k: True) - def get_table(self, relation: BigQueryRelation) -> Optional[Table]: - try: - table = self.connections.get_bq_table( - database=relation.database, schema=relation.schema, identifier=relation.identifier - ) - except google.cloud.exceptions.NotFound: - table = None - return table - def _update_column_dict(self, bq_column_dict, dbt_columns, parent=""): """ Helper function to recursively traverse the schema of a table in the @@ -759,17 +749,26 @@ def get_view_options(self, config: Dict[str, Any], node: Dict[str, Any]) -> Dict opts = self.get_common_options(config, node) return opts + @available.parse(lambda *a, **k: True) + def get_bq_table(self, relation: BigQueryRelation) -> Optional[BigQueryTable]: + try: + table = self.connections.get_bq_table( + relation.database, relation.schema, relation.identifier + ) + except google.cloud.exceptions.NotFound: + table = None + return table + def describe_relation(self, relation: BigQueryRelation): if relation.type == RelationType.MaterializedView: - macro = "bigquery__describe_materialized_view" + bq_table = self.get_bq_table(relation) parser = BigQueryMaterializedViewConfig else: raise dbt.exceptions.DbtRuntimeError( f"The method `BigQueryAdapter.describe_relation` is not implemented " f"for the relation type: {relation.type}" ) - relation_results = self.execute_macro(macro, kwargs={"relation": relation}) - return parser.from_relation_results(relation_results) + return parser.from_bq_table(bq_table) @available.parse_none def grant_access_to(self, entity, entity_type, role, grant_target_dict): diff --git a/dbt/adapters/bigquery/relation.py b/dbt/adapters/bigquery/relation.py index 184e133a1..65daa2808 100644 --- a/dbt/adapters/bigquery/relation.py +++ b/dbt/adapters/bigquery/relation.py @@ -1,10 +1,10 @@ from dataclasses import dataclass, field -from typing import FrozenSet, Optional +from typing import FrozenSet, Optional, TypeVar from itertools import chain, islice from dbt.context.providers import RuntimeConfigObject from dbt.adapters.base.relation import BaseRelation, ComponentName, InformationSchema -from dbt.adapters.relation_configs import RelationResults, RelationConfigChangeAction +from dbt.adapters.relation_configs import RelationConfigChangeAction from dbt.adapters.bigquery.relation_configs import ( BigQueryClusterConfigChange, BigQueryMaterializedViewConfig, @@ -16,7 +16,7 @@ from dbt.contracts.relation import RelationType from dbt.exceptions import CompilationError from dbt.utils import filter_null_values -from typing import TypeVar +from google.cloud.bigquery import Table as BigQueryTable Self = TypeVar("Self", bound="BigQueryRelation") @@ -74,12 +74,10 @@ def materialized_view_from_model_node( @classmethod def materialized_view_config_changeset( - cls, relation_results: RelationResults, runtime_config: RuntimeConfigObject + cls, table: BigQueryTable, runtime_config: RuntimeConfigObject ) -> Optional[BigQueryMaterializedViewConfigChangeset]: config_change_collection = BigQueryMaterializedViewConfigChangeset() - existing_materialized_view = BigQueryMaterializedViewConfig.from_relation_results( - relation_results - ) + existing_materialized_view = BigQueryMaterializedViewConfig.from_bq_table(table) new_materialized_view = cls.materialized_view_from_model_node(runtime_config.model) assert isinstance(existing_materialized_view, BigQueryMaterializedViewConfig) assert isinstance(new_materialized_view, BigQueryMaterializedViewConfig) diff --git a/dbt/adapters/bigquery/relation_configs/_base.py b/dbt/adapters/bigquery/relation_configs/_base.py index 37f9423e9..92de1a854 100644 --- a/dbt/adapters/bigquery/relation_configs/_base.py +++ b/dbt/adapters/bigquery/relation_configs/_base.py @@ -3,7 +3,9 @@ import agate from dbt.adapters.base.relation import Policy -from dbt.adapters.relation_configs import RelationConfigBase, RelationResults +from dbt.adapters.relation_configs import RelationConfigBase +from google.cloud.bigquery import Table as BigQueryTable + from dbt.adapters.bigquery.relation_configs.policies import ( BigQueryIncludePolicy, BigQueryQuotePolicy, @@ -35,16 +37,14 @@ def parse_model_node(cls, model_node: ModelNode) -> dict: ) @classmethod - def from_relation_results(cls, relation_results: RelationResults) -> "RelationConfigBase": - relation_config = cls.parse_relation_results(relation_results) + def from_bq_table(cls, table: BigQueryTable) -> "RelationConfigBase": + relation_config = cls.parse_bq_table(table) relation = cls.from_dict(relation_config) return relation @classmethod - def parse_relation_results(cls, relation_results: RelationResults) -> dict: - raise NotImplementedError( - "`parse_relation_results()` needs to be implemented on this RelationConfigBase instance" - ) + def parse_bq_table(cls, table: BigQueryTable) -> dict: + raise NotImplementedError("`parse_bq_table()` is not implemented for this relation type") @classmethod def _render_part(cls, component: ComponentName, value: Optional[str]) -> Optional[str]: diff --git a/dbt/adapters/bigquery/relation_configs/cluster.py b/dbt/adapters/bigquery/relation_configs/cluster.py index addf84db6..ad5b7b2ed 100644 --- a/dbt/adapters/bigquery/relation_configs/cluster.py +++ b/dbt/adapters/bigquery/relation_configs/cluster.py @@ -1,9 +1,9 @@ from dataclasses import dataclass from typing import Any, Dict, FrozenSet -import agate from dbt.adapters.relation_configs import RelationConfigChange from dbt.contracts.graph.nodes import ModelNode +from google.cloud.bigquery import Table as BigQueryTable from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase @@ -40,8 +40,8 @@ def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: return config_dict @classmethod - def parse_relation_results(cls, relation_results: agate.Table) -> Dict[str, Any]: # type: ignore - config_dict = {"fields": frozenset(row.get("column_name") for row in relation_results)} + def parse_bq_table(cls, table: BigQueryTable) -> Dict[str, Any]: # type: ignore + config_dict = {"fields": frozenset(table.clustering_fields)} return config_dict diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index ef6a223d2..51351aabf 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -1,10 +1,9 @@ from dataclasses import dataclass from typing import Any, Dict, Optional -import agate -from dbt.adapters.relation_configs import RelationResults from dbt.contracts.graph.nodes import ModelNode from dbt.contracts.relation import ComponentName +from google.cloud.bigquery import Table as BigQueryTable from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase from dbt.adapters.bigquery.relation_configs.options import ( @@ -25,17 +24,17 @@ class BigQueryMaterializedViewConfig(BigQueryRelationConfigBase): https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_materialized_view_statement The following parameters are configurable by dbt: - - materialized_view_name: name of the materialized view - - schema_name: dataset name of the materialized view - - database_name: project name of the database + - table_id: name of the materialized view + - dataset_id: dataset name of the materialized view + - project_id: project name of the database - options: options that get set in `SET OPTIONS()` clause - partition: object containing partition information - cluster: object containing cluster information """ - materialized_view_name: str - schema_name: str - database_name: str + table_id: str + dataset_id: str + project_id: str options: BigQueryOptionsConfig partition: Optional[PartitionConfig] = None cluster: Optional[BigQueryClusterConfig] = None @@ -44,13 +43,9 @@ class BigQueryMaterializedViewConfig(BigQueryRelationConfigBase): def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryMaterializedViewConfig": # required kwargs_dict: Dict[str, Any] = { - "materialized_view_name": cls._render_part( - ComponentName.Identifier, config_dict["materialized_view_name"] - ), - "schema_name": cls._render_part(ComponentName.Schema, config_dict["schema_name"]), - "database_name": cls._render_part( - ComponentName.Database, config_dict["database_name"] - ), + "table_id": cls._render_part(ComponentName.Identifier, config_dict["table_id"]), + "dataset_id": cls._render_part(ComponentName.Schema, config_dict["dataset_id"]), + "project_id": cls._render_part(ComponentName.Database, config_dict["project_id"]), "options": BigQueryOptionsConfig.from_dict(config_dict["options"]), } @@ -67,9 +62,9 @@ def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryMaterializedViewConf @classmethod def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: config_dict = { - "materialized_view_name": model_node.identifier, - "schema_name": model_node.schema, - "database_name": model_node.database, + "table_id": model_node.identifier, + "dataset_id": model_node.schema, + "project_id": model_node.database, # despite this being a foreign object, there will always be options because of defaults "options": BigQueryOptionsConfig.parse_model_node(model_node), } @@ -84,27 +79,21 @@ def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: return config_dict @classmethod - def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, Any]: - materialized_view_config: agate.Table = relation_results.get("materialized_view") # type: ignore - materialized_view: agate.Row = cls._get_first_row(materialized_view_config) - + def parse_bq_table(cls, table: BigQueryTable) -> Dict[str, Any]: config_dict = { - "materialized_view_name": materialized_view.get("table_name"), - "schema_name": materialized_view.get("table_schema"), - "database_name": materialized_view.get("table_catalog"), + "table_id": table.table_id, + "dataset_id": table.dataset_id, + "project_id": table.project, # despite this being a foreign object, there will always be options because of defaults - "options": BigQueryOptionsConfig.parse_relation_results(relation_results), + "options": BigQueryOptionsConfig.parse_bq_table(table), } # optional - if partition_by := relation_results.get("partition_by"): - config_dict.update({"partition": PartitionConfig.parse_relation_results(partition_by)}) # type: ignore - - cluster_by: agate.Table = relation_results.get("cluster_by") # type: ignore - if len(cluster_by) > 0: - config_dict.update( - {"cluster": BigQueryClusterConfig.parse_relation_results(cluster_by)} - ) + if table.time_partitioning or table.range_partitioning: + config_dict.update({"partition": PartitionConfig.parse_bq_table(table)}) + + if table.clustering_fields: + config_dict.update({"cluster": BigQueryClusterConfig.parse_bq_table(table)}) return config_dict diff --git a/dbt/adapters/bigquery/relation_configs/options.py b/dbt/adapters/bigquery/relation_configs/options.py index a84d7dd19..758740c50 100644 --- a/dbt/adapters/bigquery/relation_configs/options.py +++ b/dbt/adapters/bigquery/relation_configs/options.py @@ -1,10 +1,10 @@ from dataclasses import dataclass from datetime import datetime, timedelta -from typing import Any, Dict, List, Optional +from typing import Any, Dict, Optional -import agate -from dbt.adapters.relation_configs import RelationConfigChange, RelationResults +from dbt.adapters.relation_configs import RelationConfigChange from dbt.contracts.graph.nodes import ModelNode +from google.cloud.bigquery import Table as BigQueryTable from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase from dbt.adapters.bigquery.utility import bool_setting, float_setting, sql_escape @@ -25,18 +25,6 @@ class BigQueryOptionsConfig(BigQueryRelationConfigBase): description: Optional[str] = None labels: Optional[Dict[str, str]] = None - @classmethod - def user_configurable_options(cls) -> List[str]: - return [ - "enable_refresh", - "refresh_interval_minutes", - "expiration_timestamp", - "max_staleness", - "kms_key_name", - "description", - "labels", - ] - def as_ddl_dict(self) -> Dict[str, Any]: """ Reformat `options_dict` so that it can be passed into the `bigquery_options()` macro. @@ -121,7 +109,15 @@ def formatted_setting(name: str) -> Any: def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: config_dict = { option: model_node.config.extra.get(option) - for option in cls.user_configurable_options() + for option in [ + "enable_refresh", + "refresh_interval_minutes", + "expiration_timestamp", + "max_staleness", + "kms_key_name", + "description", + "labels", + ] } # update dbt-specific versions of these settings @@ -135,13 +131,17 @@ def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: return config_dict @classmethod - def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, Any]: - options_config: agate.Table = relation_results.get("options") # type: ignore + def parse_bq_table(cls, table: BigQueryTable) -> Dict[str, Any]: config_dict = { - option.get("option_name"): option.get("option_value") - for option in options_config - if option.get("option_name") in cls.user_configurable_options() + "enable_refresh": table.mview_enable_refresh, + "refresh_interval_minutes": table.mview_refresh_interval.seconds / 60, + "expiration_timestamp": table.expires, + "max_staleness": None, + "description": table.description, + "labels": table.labels, } + if encryption_configuration := table.encryption_configuration: + config_dict.update({"kms_key_name": encryption_configuration.kms_key_name}) return config_dict diff --git a/dbt/adapters/bigquery/relation_configs/partition.py b/dbt/adapters/bigquery/relation_configs/partition.py index 8dd193ad1..cd57719db 100644 --- a/dbt/adapters/bigquery/relation_configs/partition.py +++ b/dbt/adapters/bigquery/relation_configs/partition.py @@ -5,7 +5,7 @@ from dbt.dataclass_schema import dbtClassMixin, ValidationError import dbt.exceptions from dbt.adapters.relation_configs import RelationConfigChange -from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import Table as BigQueryTable @dataclass @@ -108,25 +108,31 @@ def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: return model_node.config.extra.get("partition_by") @classmethod - def parse_relation_results(cls, bq_table: Table) -> Dict[str, Any]: + def parse_bq_table(cls, table: BigQueryTable) -> Dict[str, Any]: """ Parse the results of a BQ Table object into a raw config for `PartitionConfig.parse` """ - range_partitioning = bq_table.range_partitioning - time_partitioning = bq_table.time_partitioning - config_dict = { - "field": time_partitioning.field, - "data_type": "", - "granularity": time_partitioning.type_, - } - - # combine range fields into dictionary, like the model config - range_dict = { - "start": range_partitioning.range_.start, - "end": range_partitioning.range_.end, - "interval": range_partitioning.range_.interval, - } - config_dict.update({"range": range_dict}) + if time_partitioning := table.time_partitioning: + field_types = {field.name: field.field_type.lower() for field in table.schema} + config_dict = { + "field": time_partitioning.field, + "data_type": field_types[time_partitioning.field], + "granularity": time_partitioning.type_, + } + + elif range_partitioning := table.range_partitioning: + config_dict = { + "field": range_partitioning.field, + "data_type": "int64", + "range": { + "start": range_partitioning.range_.start, + "end": range_partitioning.range_.end, + "interval": range_partitioning.range_.interval, + }, + } + + else: + config_dict = {} return config_dict diff --git a/dbt/include/bigquery/macros/relations/materialized_view/alter.sql b/dbt/include/bigquery/macros/relations/materialized_view/alter.sql index b0381a7bf..7320addde 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/alter.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/alter.sql @@ -19,7 +19,7 @@ {% endmacro %} {% macro bigquery__get_materialized_view_configuration_changes(existing_relation, new_config) %} - {% set _existing_materialized_view = bigquery__describe_materialized_view(existing_relation) %} + {% set _existing_materialized_view = adapter.describe_relation(existing_relation) %} {% set _configuration_changes = existing_relation.materialized_view_config_changeset(_existing_materialized_view, new_config) %} {% do return(_configuration_changes) %} {% endmacro %} diff --git a/dbt/include/bigquery/macros/relations/materialized_view/describe.sql b/dbt/include/bigquery/macros/relations/materialized_view/describe.sql deleted file mode 100644 index 64bbdaee8..000000000 --- a/dbt/include/bigquery/macros/relations/materialized_view/describe.sql +++ /dev/null @@ -1,29 +0,0 @@ -{% macro bigquery__describe_materialized_view(relation) %} - {% set bq_relation = adapter.get_table(relation) %} - {% do return(bq_relation) %} -{% endmacro %} - - -{% macro bigquery__describe_materialized_view_sql(relation) %} - {%- set _materialized_view_sql -%} - select - table_name, - table_schema, - table_catalog - from {{ relation.information_schema('MATERIALIZED_VIEWS') }} - where table_name = '{{ relation.identifier }}' - and table_schema = '{{ relation.schema }}' - and table_catalog = '{{ relation.database }}' - {%- endset %} - {% set _materialized_view = run_query(_materialized_view_sql) %} - - {%- set _cluster_by = bigquery__describe_cluster(relation) -%} - {%- set _options = bigquery__describe_options(relation) -%} - - {% do return({ - 'materialized_view': _materialized_view, - 'partition_by': bigquery__describe_partition(relation), - 'cluster_by': _cluster_by, - 'options': _options - }) %} -{% endmacro %} diff --git a/tests/functional/adapter/describe_relation/_files.py b/tests/functional/adapter/describe_relation/_files.py index e5d330500..ac0203049 100644 --- a/tests/functional/adapter/describe_relation/_files.py +++ b/tests/functional/adapter/describe_relation/_files.py @@ -1,16 +1,17 @@ MY_SEED = """ id,value,record_date -1,100,2023-01-01 00:00:00 -2,200,2023-01-02 00:00:00 -3,300,2023-01-02 00:00:00 +1,100,2023-01-01 12:00:00 +2,200,2023-01-02 12:00:00 +3,300,2023-01-02 12:00:00 """.strip() + MY_BASE_TABLE = """ {{ config( materialized='table', partition_by={ "field": "record_date", - "data_type": "timestamp", + "data_type": "datetime", "granularity": "day" }, cluster_by=["id", "value"] @@ -22,12 +23,13 @@ from {{ ref('my_seed') }} """ + MY_MATERIALIZED_VIEW = """ {{ config( materialized='materialized_view', partition_by={ "field": "record_date", - "data_type": "timestamp", + "data_type": "datetime", "granularity": "day" }, cluster_by="id", @@ -40,13 +42,39 @@ """ +MY_OTHER_BASE_TABLE = """ +{{ config( + materialized='table', + partition_by={ + "field": "value", + "data_type": "int64", + "range": { + "start": 0, + "end": 500, + "interval": 50 + } + }, + cluster_by=["id", "value"] +) }} +select + id, + value, + record_date +from {{ ref('my_seed') }} +""" + + MY_OTHER_MATERIALIZED_VIEW = """ {{ config( materialized='materialized_view', partition_by={ - "field": "record_date", - "data_type": "timestamp", - "granularity": "day" + "field": "value", + "data_type": "int64", + "range": { + "start": 0, + "end": 500, + "interval": 50 + } }, cluster_by="id", enable_refresh=False, @@ -56,5 +84,5 @@ id, value, record_date -from {{ ref('my_base_table') }} +from {{ ref('my_other_base_table') }} """ diff --git a/tests/functional/adapter/describe_relation/test_describe_relation.py b/tests/functional/adapter/describe_relation/test_describe_relation.py index 4d6c77cca..adccd5126 100644 --- a/tests/functional/adapter/describe_relation/test_describe_relation.py +++ b/tests/functional/adapter/describe_relation/test_describe_relation.py @@ -5,25 +5,21 @@ from dbt.tests.util import get_connection, run_dbt from dbt.adapters.bigquery.relation_configs import BigQueryMaterializedViewConfig -from tests.functional.adapter.describe_relation._files import ( - MY_BASE_TABLE, - MY_MATERIALIZED_VIEW, - MY_OTHER_MATERIALIZED_VIEW, - MY_SEED, -) +from tests.functional.adapter.describe_relation import _files class TestDescribeRelation: @pytest.fixture(scope="class", autouse=True) def seeds(self): - return {"my_seed.csv": MY_SEED} + return {"my_seed.csv": _files.MY_SEED} @pytest.fixture(scope="class", autouse=True) def models(self): yield { - "my_base_table.sql": MY_BASE_TABLE, - "my_materialized_view.sql": MY_MATERIALIZED_VIEW, - "my_other_materialized_view.sql": MY_OTHER_MATERIALIZED_VIEW, + "my_base_table.sql": _files.MY_BASE_TABLE, + "my_materialized_view.sql": _files.MY_MATERIALIZED_VIEW, + "my_other_base_table.sql": _files.MY_OTHER_BASE_TABLE, + "my_other_materialized_view.sql": _files.MY_OTHER_MATERIALIZED_VIEW, } @pytest.fixture(scope="class") @@ -79,9 +75,12 @@ def test_describe_materialized_view(self, project, my_materialized_view): with get_connection(project.adapter): results = project.adapter.describe_relation(my_materialized_view) assert isinstance(results, BigQueryMaterializedViewConfig) - assert results.materialized_view_name == f'"{my_materialized_view.identifier}"' - assert results.schema_name == f'"{my_materialized_view.schema}"' - assert results.database_name == f'"{my_materialized_view.database}"' + assert results.table_id == f'"{my_materialized_view.identifier}"' + assert results.dataset_id == f'"{my_materialized_view.schema}"' + assert results.project_id == f'"{my_materialized_view.database}"' + assert results.partition.field == "record_date" + assert results.partition.data_type == "datetime" + assert results.partition.granularity == "day" assert results.cluster.fields == frozenset({"id"}) assert results.options.enable_refresh is True assert results.options.refresh_interval_minutes == 30 @@ -90,9 +89,12 @@ def test_describe_other_materialized_view(self, project, my_other_materialized_v with get_connection(project.adapter): results = project.adapter.describe_relation(my_other_materialized_view) assert isinstance(results, BigQueryMaterializedViewConfig) - assert results.materialized_view_name == f'"{my_other_materialized_view.identifier}"' - assert results.schema_name == f'"{my_other_materialized_view.schema}"' - assert results.database_name == f'"{my_other_materialized_view.database}"' + assert results.table_id == f'"{my_other_materialized_view.identifier}"' + assert results.dataset_id == f'"{my_other_materialized_view.schema}"' + assert results.project_id == f'"{my_other_materialized_view.database}"' + assert results.partition.field == "value" + assert results.partition.data_type == "int64" + assert results.partition.range == {"start": 0, "end": 500, "interval": 50} assert results.cluster.fields == frozenset({"id"}) assert results.options.enable_refresh is False assert results.options.refresh_interval_minutes == 30 # BQ returns it to the default From a2e9fa3afdc2a859fa48d46261b3fdec3d03052b Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 11 Oct 2023 00:16:57 -0400 Subject: [PATCH 47/53] basic tests pass --- dbt/adapters/bigquery/impl.py | 1 + dbt/adapters/bigquery/relation.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index 5578a4a27..6adfaeee5 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -759,6 +759,7 @@ def get_bq_table(self, relation: BigQueryRelation) -> Optional[BigQueryTable]: table = None return table + @available.parse(lambda *a, **k: True) def describe_relation(self, relation: BigQueryRelation): if relation.type == RelationType.MaterializedView: bq_table = self.get_bq_table(relation) diff --git a/dbt/adapters/bigquery/relation.py b/dbt/adapters/bigquery/relation.py index 65daa2808..3076a2243 100644 --- a/dbt/adapters/bigquery/relation.py +++ b/dbt/adapters/bigquery/relation.py @@ -16,7 +16,6 @@ from dbt.contracts.relation import RelationType from dbt.exceptions import CompilationError from dbt.utils import filter_null_values -from google.cloud.bigquery import Table as BigQueryTable Self = TypeVar("Self", bound="BigQueryRelation") @@ -74,10 +73,11 @@ def materialized_view_from_model_node( @classmethod def materialized_view_config_changeset( - cls, table: BigQueryTable, runtime_config: RuntimeConfigObject + cls, + existing_materialized_view: BigQueryMaterializedViewConfig, + runtime_config: RuntimeConfigObject, ) -> Optional[BigQueryMaterializedViewConfigChangeset]: config_change_collection = BigQueryMaterializedViewConfigChangeset() - existing_materialized_view = BigQueryMaterializedViewConfig.from_bq_table(table) new_materialized_view = cls.materialized_view_from_model_node(runtime_config.model) assert isinstance(existing_materialized_view, BigQueryMaterializedViewConfig) assert isinstance(new_materialized_view, BigQueryMaterializedViewConfig) From b2187bd23aa9c56c4ad773207d53e555ccb40021 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 11 Oct 2023 00:39:11 -0400 Subject: [PATCH 48/53] existing change monitoring tests pass --- tests/functional/adapter/materialized_view_tests/_files.py | 6 ++++-- .../test_materialized_view_changes.py | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/functional/adapter/materialized_view_tests/_files.py b/tests/functional/adapter/materialized_view_tests/_files.py index 6cc69d2c6..8580b6b3d 100644 --- a/tests/functional/adapter/materialized_view_tests/_files.py +++ b/tests/functional/adapter/materialized_view_tests/_files.py @@ -5,12 +5,13 @@ 3,300,2023-01-02 00:00:00 """.strip() + MY_BASE_TABLE = """ {{ config( materialized='table', partition_by={ "field": "record_valid_date", - "data_type": "timestamp", + "data_type": "datetime", "granularity": "day" }, cluster_by=["id", "value"] @@ -22,12 +23,13 @@ from {{ ref('my_seed') }} """ + MY_MATERIALIZED_VIEW = """ {{ config( materialized='materialized_view', partition_by={ "field": "record_valid_date", - "data_type": "timestamp", + "data_type": "datetime", "granularity": "day" }, cluster_by=["id", "value"], diff --git a/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py b/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py index 826ec5d24..ca592613e 100644 --- a/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py +++ b/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py @@ -19,7 +19,9 @@ def check_start_state(project, materialized_view): assert isinstance(results, BigQueryMaterializedViewConfig) assert results.options.enable_refresh is True assert results.options.refresh_interval_minutes == 60 - assert results.options.max_staleness == "0-0 0 0:45:0" # ~= "INTERVAL 45 MINUTE" + assert results.partition.field == "record_valid_date" + assert results.partition.data_type == "datetime" + assert results.partition.granularity == "day" assert results.cluster.fields == frozenset({"id", "value"}) @staticmethod From 120c1cea38d0b25faf330cc91b6a57eb76c715a5 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 11 Oct 2023 00:53:38 -0400 Subject: [PATCH 49/53] partition change monitoring tests pass --- .../adapter/materialized_view_tests/_files.py | 24 ++++++++++++++ .../adapter/materialized_view_tests/_mixin.py | 32 ++++++++++++------- .../test_materialized_view_changes.py | 23 ++++++++++++- 3 files changed, 67 insertions(+), 12 deletions(-) diff --git a/tests/functional/adapter/materialized_view_tests/_files.py b/tests/functional/adapter/materialized_view_tests/_files.py index 8580b6b3d..1ee64269d 100644 --- a/tests/functional/adapter/materialized_view_tests/_files.py +++ b/tests/functional/adapter/materialized_view_tests/_files.py @@ -24,6 +24,7 @@ """ +# the whitespace to the left on partition matters here MY_MATERIALIZED_VIEW = """ {{ config( materialized='materialized_view', @@ -43,3 +44,26 @@ record_valid_date from {{ ref('my_base_table') }} """ + + +# the whitespace to the left on partition matters here +MY_OTHER_BASE_TABLE = """ +{{ config( + materialized='table', + partition_by={ + "field": "value", + "data_type": "int64", + "range": { + "start": 0, + "end": 500, + "interval": 50 + } + }, + cluster_by=["id", "value"] +) }} +select + id, + value, + record_valid_date +from {{ ref('my_seed') }} +""" diff --git a/tests/functional/adapter/materialized_view_tests/_mixin.py b/tests/functional/adapter/materialized_view_tests/_mixin.py index 5933a2dd2..5f75c7c04 100644 --- a/tests/functional/adapter/materialized_view_tests/_mixin.py +++ b/tests/functional/adapter/materialized_view_tests/_mixin.py @@ -12,11 +12,7 @@ set_model_file, ) -from tests.functional.adapter.materialized_view_tests._files import ( - MY_BASE_TABLE, - MY_MATERIALIZED_VIEW, - MY_SEED, -) +from tests.functional.adapter.materialized_view_tests import _files class BigQueryMaterializedViewMixin: @@ -35,11 +31,24 @@ def my_base_table(self, project) -> BaseRelation: type=RelationType.Table, ) + @pytest.fixture(scope="class") + def my_other_base_table(self, project) -> BaseRelation: + """ + Following the sentiment of `my_base_table` above, if we want to alter the partition + on the materialized view, we either need to update the partition on the base table, + or we need a second table with a different partition. + """ + return project.adapter.Relation.create( + identifier="my_other_base_table", + schema=project.test_schema, + database=project.database, + type=RelationType.Table, + ) + @pytest.fixture(scope="function", autouse=True) - def setup(self, project, my_base_table, my_materialized_view): # type: ignore + def setup(self, project, my_base_table, my_other_base_table, my_materialized_view): # type: ignore run_dbt(["seed"]) - run_dbt(["run", "--models", my_base_table.identifier, "--full-refresh"]) - run_dbt(["run", "--models", my_materialized_view.identifier, "--full-refresh"]) + run_dbt(["run", "--full-refresh"]) # the tests touch these files, store their contents in memory initial_model = get_model_file(project, my_materialized_view) @@ -52,15 +61,16 @@ def setup(self, project, my_base_table, my_materialized_view): # type: ignore @pytest.fixture(scope="class", autouse=True) def seeds(self): - return {"my_seed.csv": MY_SEED} + return {"my_seed.csv": _files.MY_SEED} @pytest.fixture(scope="class", autouse=True) def models(self): yield { "my_table.sql": MY_TABLE, "my_view.sql": MY_VIEW, - "my_base_table.sql": MY_BASE_TABLE, - "my_materialized_view.sql": MY_MATERIALIZED_VIEW, + "my_base_table.sql": _files.MY_BASE_TABLE, + "my_other_base_table.sql": _files.MY_OTHER_BASE_TABLE, + "my_materialized_view.sql": _files.MY_MATERIALIZED_VIEW, } @staticmethod diff --git a/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py b/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py index ca592613e..d2df9735e 100644 --- a/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py +++ b/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py @@ -45,7 +45,28 @@ def check_state_alter_change_is_applied(project, materialized_view): @staticmethod def change_config_via_replace(project, materialized_view): initial_model = get_model_file(project, materialized_view) - new_model = initial_model.replace('cluster_by=["id", "value"]', 'cluster_by="id"') + # the whitespace to the left on partition matters here + old_partition = """ + partition_by={ + "field": "record_valid_date", + "data_type": "datetime", + "granularity": "day" + },""" + new_partition = """ + partition_by={ + "field": "value", + "data_type": "int64", + "range": { + "start": 0, + "end": 500, + "interval": 50 + } + },""" + new_model = ( + initial_model.replace('cluster_by=["id", "value"]', 'cluster_by="id"') + .replace(old_partition, new_partition) + .replace("'my_base_table'", "'my_other_base_table'") + ) set_model_file(project, materialized_view, new_model) @staticmethod From 2581b623de451b66c6d49a56b1ca7d77656d8e7e Mon Sep 17 00:00:00 2001 From: Mike Alfare <13974384+mikealfare@users.noreply.github.com> Date: Wed, 11 Oct 2023 00:55:39 -0400 Subject: [PATCH 50/53] ADAP-940: Add change monitoring for partitioning clause (#962) * committing to park changes and wrap up other 1.7 items * update describe to use the sdk instead of sql to pick up partition information * basic tests pass * existing change monitoring tests pass * partition change monitoring tests pass --------- Co-authored-by: colin-rogers-dbt <111200756+colin-rogers-dbt@users.noreply.github.com> --- dbt/adapters/bigquery/impl.py | 18 ++++-- dbt/adapters/bigquery/relation.py | 12 ++-- .../bigquery/relation_configs/_base.py | 14 ++--- .../bigquery/relation_configs/cluster.py | 6 +- .../relation_configs/materialized_view.py | 60 +++++++------------ .../bigquery/relation_configs/options.py | 42 ++++++------- .../bigquery/relation_configs/partition.py | 40 ++++++++----- .../partition/describe.sql | 4 +- .../relations/materialized_view/alter.sql | 2 +- .../relations/materialized_view/describe.sql | 22 ------- .../adapter/describe_relation/_files.py | 46 +++++++++++--- .../test_describe_relation.py | 34 ++++++----- .../adapter/materialized_view_tests/_files.py | 30 +++++++++- .../adapter/materialized_view_tests/_mixin.py | 32 ++++++---- .../test_materialized_view_changes.py | 27 ++++++++- 15 files changed, 229 insertions(+), 160 deletions(-) delete mode 100644 dbt/include/bigquery/macros/relations/materialized_view/describe.sql diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index cc0a6c5f3..6adfaeee5 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -32,7 +32,7 @@ import google.auth import google.oauth2 import google.cloud.bigquery -from google.cloud.bigquery import AccessEntry, SchemaField +from google.cloud.bigquery import AccessEntry, SchemaField, Table as BigQueryTable import google.cloud.exceptions from dbt.adapters.bigquery import BigQueryColumn, BigQueryConnectionManager @@ -749,17 +749,27 @@ def get_view_options(self, config: Dict[str, Any], node: Dict[str, Any]) -> Dict opts = self.get_common_options(config, node) return opts + @available.parse(lambda *a, **k: True) + def get_bq_table(self, relation: BigQueryRelation) -> Optional[BigQueryTable]: + try: + table = self.connections.get_bq_table( + relation.database, relation.schema, relation.identifier + ) + except google.cloud.exceptions.NotFound: + table = None + return table + + @available.parse(lambda *a, **k: True) def describe_relation(self, relation: BigQueryRelation): if relation.type == RelationType.MaterializedView: - macro = "bigquery__describe_materialized_view" + bq_table = self.get_bq_table(relation) parser = BigQueryMaterializedViewConfig else: raise dbt.exceptions.DbtRuntimeError( f"The method `BigQueryAdapter.describe_relation` is not implemented " f"for the relation type: {relation.type}" ) - relation_results = self.execute_macro(macro, kwargs={"relation": relation}) - return parser.from_relation_results(relation_results) + return parser.from_bq_table(bq_table) @available.parse_none def grant_access_to(self, entity, entity_type, role, grant_target_dict): diff --git a/dbt/adapters/bigquery/relation.py b/dbt/adapters/bigquery/relation.py index 184e133a1..3076a2243 100644 --- a/dbt/adapters/bigquery/relation.py +++ b/dbt/adapters/bigquery/relation.py @@ -1,10 +1,10 @@ from dataclasses import dataclass, field -from typing import FrozenSet, Optional +from typing import FrozenSet, Optional, TypeVar from itertools import chain, islice from dbt.context.providers import RuntimeConfigObject from dbt.adapters.base.relation import BaseRelation, ComponentName, InformationSchema -from dbt.adapters.relation_configs import RelationResults, RelationConfigChangeAction +from dbt.adapters.relation_configs import RelationConfigChangeAction from dbt.adapters.bigquery.relation_configs import ( BigQueryClusterConfigChange, BigQueryMaterializedViewConfig, @@ -16,7 +16,6 @@ from dbt.contracts.relation import RelationType from dbt.exceptions import CompilationError from dbt.utils import filter_null_values -from typing import TypeVar Self = TypeVar("Self", bound="BigQueryRelation") @@ -74,12 +73,11 @@ def materialized_view_from_model_node( @classmethod def materialized_view_config_changeset( - cls, relation_results: RelationResults, runtime_config: RuntimeConfigObject + cls, + existing_materialized_view: BigQueryMaterializedViewConfig, + runtime_config: RuntimeConfigObject, ) -> Optional[BigQueryMaterializedViewConfigChangeset]: config_change_collection = BigQueryMaterializedViewConfigChangeset() - existing_materialized_view = BigQueryMaterializedViewConfig.from_relation_results( - relation_results - ) new_materialized_view = cls.materialized_view_from_model_node(runtime_config.model) assert isinstance(existing_materialized_view, BigQueryMaterializedViewConfig) assert isinstance(new_materialized_view, BigQueryMaterializedViewConfig) diff --git a/dbt/adapters/bigquery/relation_configs/_base.py b/dbt/adapters/bigquery/relation_configs/_base.py index 37f9423e9..92de1a854 100644 --- a/dbt/adapters/bigquery/relation_configs/_base.py +++ b/dbt/adapters/bigquery/relation_configs/_base.py @@ -3,7 +3,9 @@ import agate from dbt.adapters.base.relation import Policy -from dbt.adapters.relation_configs import RelationConfigBase, RelationResults +from dbt.adapters.relation_configs import RelationConfigBase +from google.cloud.bigquery import Table as BigQueryTable + from dbt.adapters.bigquery.relation_configs.policies import ( BigQueryIncludePolicy, BigQueryQuotePolicy, @@ -35,16 +37,14 @@ def parse_model_node(cls, model_node: ModelNode) -> dict: ) @classmethod - def from_relation_results(cls, relation_results: RelationResults) -> "RelationConfigBase": - relation_config = cls.parse_relation_results(relation_results) + def from_bq_table(cls, table: BigQueryTable) -> "RelationConfigBase": + relation_config = cls.parse_bq_table(table) relation = cls.from_dict(relation_config) return relation @classmethod - def parse_relation_results(cls, relation_results: RelationResults) -> dict: - raise NotImplementedError( - "`parse_relation_results()` needs to be implemented on this RelationConfigBase instance" - ) + def parse_bq_table(cls, table: BigQueryTable) -> dict: + raise NotImplementedError("`parse_bq_table()` is not implemented for this relation type") @classmethod def _render_part(cls, component: ComponentName, value: Optional[str]) -> Optional[str]: diff --git a/dbt/adapters/bigquery/relation_configs/cluster.py b/dbt/adapters/bigquery/relation_configs/cluster.py index addf84db6..ad5b7b2ed 100644 --- a/dbt/adapters/bigquery/relation_configs/cluster.py +++ b/dbt/adapters/bigquery/relation_configs/cluster.py @@ -1,9 +1,9 @@ from dataclasses import dataclass from typing import Any, Dict, FrozenSet -import agate from dbt.adapters.relation_configs import RelationConfigChange from dbt.contracts.graph.nodes import ModelNode +from google.cloud.bigquery import Table as BigQueryTable from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase @@ -40,8 +40,8 @@ def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: return config_dict @classmethod - def parse_relation_results(cls, relation_results: agate.Table) -> Dict[str, Any]: # type: ignore - config_dict = {"fields": frozenset(row.get("column_name") for row in relation_results)} + def parse_bq_table(cls, table: BigQueryTable) -> Dict[str, Any]: # type: ignore + config_dict = {"fields": frozenset(table.clustering_fields)} return config_dict diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/materialized_view.py index f3e3914f9..51351aabf 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/materialized_view.py @@ -1,10 +1,9 @@ from dataclasses import dataclass from typing import Any, Dict, Optional -import agate -from dbt.adapters.relation_configs import RelationResults from dbt.contracts.graph.nodes import ModelNode from dbt.contracts.relation import ComponentName +from google.cloud.bigquery import Table as BigQueryTable from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase from dbt.adapters.bigquery.relation_configs.options import ( @@ -25,17 +24,17 @@ class BigQueryMaterializedViewConfig(BigQueryRelationConfigBase): https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_materialized_view_statement The following parameters are configurable by dbt: - - materialized_view_name: name of the materialized view - - schema_name: dataset name of the materialized view - - database_name: project name of the database + - table_id: name of the materialized view + - dataset_id: dataset name of the materialized view + - project_id: project name of the database - options: options that get set in `SET OPTIONS()` clause - partition: object containing partition information - cluster: object containing cluster information """ - materialized_view_name: str - schema_name: str - database_name: str + table_id: str + dataset_id: str + project_id: str options: BigQueryOptionsConfig partition: Optional[PartitionConfig] = None cluster: Optional[BigQueryClusterConfig] = None @@ -44,13 +43,9 @@ class BigQueryMaterializedViewConfig(BigQueryRelationConfigBase): def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryMaterializedViewConfig": # required kwargs_dict: Dict[str, Any] = { - "materialized_view_name": cls._render_part( - ComponentName.Identifier, config_dict["materialized_view_name"] - ), - "schema_name": cls._render_part(ComponentName.Schema, config_dict["schema_name"]), - "database_name": cls._render_part( - ComponentName.Database, config_dict["database_name"] - ), + "table_id": cls._render_part(ComponentName.Identifier, config_dict["table_id"]), + "dataset_id": cls._render_part(ComponentName.Schema, config_dict["dataset_id"]), + "project_id": cls._render_part(ComponentName.Database, config_dict["project_id"]), "options": BigQueryOptionsConfig.from_dict(config_dict["options"]), } @@ -67,9 +62,9 @@ def from_dict(cls, config_dict: Dict[str, Any]) -> "BigQueryMaterializedViewConf @classmethod def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: config_dict = { - "materialized_view_name": model_node.identifier, - "schema_name": model_node.schema, - "database_name": model_node.database, + "table_id": model_node.identifier, + "dataset_id": model_node.schema, + "project_id": model_node.database, # despite this being a foreign object, there will always be options because of defaults "options": BigQueryOptionsConfig.parse_model_node(model_node), } @@ -84,30 +79,21 @@ def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: return config_dict @classmethod - def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, Any]: - materialized_view_config: agate.Table = relation_results.get("materialized_view") # type: ignore - materialized_view: agate.Row = cls._get_first_row(materialized_view_config) - + def parse_bq_table(cls, table: BigQueryTable) -> Dict[str, Any]: config_dict = { - "materialized_view_name": materialized_view.get("table_name"), - "schema_name": materialized_view.get("table_schema"), - "database_name": materialized_view.get("table_catalog"), + "table_id": table.table_id, + "dataset_id": table.dataset_id, + "project_id": table.project, # despite this being a foreign object, there will always be options because of defaults - "options": BigQueryOptionsConfig.parse_relation_results(relation_results), + "options": BigQueryOptionsConfig.parse_bq_table(table), } # optional - if partition_by := relation_results.get("partition_by"): - if len(partition_by) > 0: - config_dict.update( - {"partition": PartitionConfig.parse_relation_results(partition_by[0])} - ) - - cluster_by: agate.Table = relation_results.get("cluster_by") # type: ignore - if len(cluster_by) > 0: - config_dict.update( - {"cluster": BigQueryClusterConfig.parse_relation_results(cluster_by)} - ) + if table.time_partitioning or table.range_partitioning: + config_dict.update({"partition": PartitionConfig.parse_bq_table(table)}) + + if table.clustering_fields: + config_dict.update({"cluster": BigQueryClusterConfig.parse_bq_table(table)}) return config_dict diff --git a/dbt/adapters/bigquery/relation_configs/options.py b/dbt/adapters/bigquery/relation_configs/options.py index a84d7dd19..758740c50 100644 --- a/dbt/adapters/bigquery/relation_configs/options.py +++ b/dbt/adapters/bigquery/relation_configs/options.py @@ -1,10 +1,10 @@ from dataclasses import dataclass from datetime import datetime, timedelta -from typing import Any, Dict, List, Optional +from typing import Any, Dict, Optional -import agate -from dbt.adapters.relation_configs import RelationConfigChange, RelationResults +from dbt.adapters.relation_configs import RelationConfigChange from dbt.contracts.graph.nodes import ModelNode +from google.cloud.bigquery import Table as BigQueryTable from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase from dbt.adapters.bigquery.utility import bool_setting, float_setting, sql_escape @@ -25,18 +25,6 @@ class BigQueryOptionsConfig(BigQueryRelationConfigBase): description: Optional[str] = None labels: Optional[Dict[str, str]] = None - @classmethod - def user_configurable_options(cls) -> List[str]: - return [ - "enable_refresh", - "refresh_interval_minutes", - "expiration_timestamp", - "max_staleness", - "kms_key_name", - "description", - "labels", - ] - def as_ddl_dict(self) -> Dict[str, Any]: """ Reformat `options_dict` so that it can be passed into the `bigquery_options()` macro. @@ -121,7 +109,15 @@ def formatted_setting(name: str) -> Any: def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: config_dict = { option: model_node.config.extra.get(option) - for option in cls.user_configurable_options() + for option in [ + "enable_refresh", + "refresh_interval_minutes", + "expiration_timestamp", + "max_staleness", + "kms_key_name", + "description", + "labels", + ] } # update dbt-specific versions of these settings @@ -135,13 +131,17 @@ def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: return config_dict @classmethod - def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, Any]: - options_config: agate.Table = relation_results.get("options") # type: ignore + def parse_bq_table(cls, table: BigQueryTable) -> Dict[str, Any]: config_dict = { - option.get("option_name"): option.get("option_value") - for option in options_config - if option.get("option_name") in cls.user_configurable_options() + "enable_refresh": table.mview_enable_refresh, + "refresh_interval_minutes": table.mview_refresh_interval.seconds / 60, + "expiration_timestamp": table.expires, + "max_staleness": None, + "description": table.description, + "labels": table.labels, } + if encryption_configuration := table.encryption_configuration: + config_dict.update({"kms_key_name": encryption_configuration.kms_key_name}) return config_dict diff --git a/dbt/adapters/bigquery/relation_configs/partition.py b/dbt/adapters/bigquery/relation_configs/partition.py index 9c9714d7f..cd57719db 100644 --- a/dbt/adapters/bigquery/relation_configs/partition.py +++ b/dbt/adapters/bigquery/relation_configs/partition.py @@ -1,11 +1,11 @@ from dataclasses import dataclass from typing import Any, Dict, List, Optional -import agate from dbt.contracts.graph.nodes import ModelNode from dbt.dataclass_schema import dbtClassMixin, ValidationError import dbt.exceptions from dbt.adapters.relation_configs import RelationConfigChange +from google.cloud.bigquery.table import Table as BigQueryTable @dataclass @@ -108,23 +108,31 @@ def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: return model_node.config.extra.get("partition_by") @classmethod - def parse_relation_results(cls, describe_relation_results: agate.Row) -> Dict[str, Any]: + def parse_bq_table(cls, table: BigQueryTable) -> Dict[str, Any]: """ - Parse the results of a describe query into a raw config for `PartitionConfig.parse` + Parse the results of a BQ Table object into a raw config for `PartitionConfig.parse` """ - config_dict = { - "field": describe_relation_results.get("partition_column_name"), - "data_type": describe_relation_results.get("partition_data_type"), - "granularity": describe_relation_results.get("partition_type"), - } - - # combine range fields into dictionary, like the model config - range_dict = { - "start": describe_relation_results.get("partition_start"), - "end": describe_relation_results.get("partition_end"), - "interval": describe_relation_results.get("partition_interval"), - } - config_dict.update({"range": range_dict}) + if time_partitioning := table.time_partitioning: + field_types = {field.name: field.field_type.lower() for field in table.schema} + config_dict = { + "field": time_partitioning.field, + "data_type": field_types[time_partitioning.field], + "granularity": time_partitioning.type_, + } + + elif range_partitioning := table.range_partitioning: + config_dict = { + "field": range_partitioning.field, + "data_type": "int64", + "range": { + "start": range_partitioning.range_.start, + "end": range_partitioning.range_.end, + "interval": range_partitioning.range_.interval, + }, + } + + else: + config_dict = {} return config_dict diff --git a/dbt/include/bigquery/macros/relation_components/partition/describe.sql b/dbt/include/bigquery/macros/relation_components/partition/describe.sql index e8e205801..1efdd56a1 100644 --- a/dbt/include/bigquery/macros/relation_components/partition/describe.sql +++ b/dbt/include/bigquery/macros/relation_components/partition/describe.sql @@ -37,6 +37,6 @@ {% macro bigquery__describe_partition(relation) %} - {% set _sql = bigquery__get_describe_partition_sql(relation) %} - {% do return(run_query(_sql)) %} + {% set bq_relation = adapter.connections.get_bq_table(relation.database, relation.schema, relation.identifier) %} + {% do return(bq_relation) %} {% endmacro %} diff --git a/dbt/include/bigquery/macros/relations/materialized_view/alter.sql b/dbt/include/bigquery/macros/relations/materialized_view/alter.sql index b0381a7bf..7320addde 100644 --- a/dbt/include/bigquery/macros/relations/materialized_view/alter.sql +++ b/dbt/include/bigquery/macros/relations/materialized_view/alter.sql @@ -19,7 +19,7 @@ {% endmacro %} {% macro bigquery__get_materialized_view_configuration_changes(existing_relation, new_config) %} - {% set _existing_materialized_view = bigquery__describe_materialized_view(existing_relation) %} + {% set _existing_materialized_view = adapter.describe_relation(existing_relation) %} {% set _configuration_changes = existing_relation.materialized_view_config_changeset(_existing_materialized_view, new_config) %} {% do return(_configuration_changes) %} {% endmacro %} diff --git a/dbt/include/bigquery/macros/relations/materialized_view/describe.sql b/dbt/include/bigquery/macros/relations/materialized_view/describe.sql deleted file mode 100644 index 231443cf8..000000000 --- a/dbt/include/bigquery/macros/relations/materialized_view/describe.sql +++ /dev/null @@ -1,22 +0,0 @@ -{% macro bigquery__describe_materialized_view(relation) %} - {%- set _materialized_view_sql -%} - select - table_name, - table_schema, - table_catalog - from {{ relation.information_schema('MATERIALIZED_VIEWS') }} - where table_name = '{{ relation.identifier }}' - and table_schema = '{{ relation.schema }}' - and table_catalog = '{{ relation.database }}' - {%- endset %} - {% set _materialized_view = run_query(_materialized_view_sql) %} - - {%- set _cluster_by = bigquery__describe_cluster(relation) -%} - {%- set _options = bigquery__describe_options(relation) -%} - - {% do return({ - 'materialized_view': _materialized_view, - 'cluster_by': _cluster_by, - 'options': _options - }) %} -{% endmacro %} diff --git a/tests/functional/adapter/describe_relation/_files.py b/tests/functional/adapter/describe_relation/_files.py index e5d330500..ac0203049 100644 --- a/tests/functional/adapter/describe_relation/_files.py +++ b/tests/functional/adapter/describe_relation/_files.py @@ -1,16 +1,17 @@ MY_SEED = """ id,value,record_date -1,100,2023-01-01 00:00:00 -2,200,2023-01-02 00:00:00 -3,300,2023-01-02 00:00:00 +1,100,2023-01-01 12:00:00 +2,200,2023-01-02 12:00:00 +3,300,2023-01-02 12:00:00 """.strip() + MY_BASE_TABLE = """ {{ config( materialized='table', partition_by={ "field": "record_date", - "data_type": "timestamp", + "data_type": "datetime", "granularity": "day" }, cluster_by=["id", "value"] @@ -22,12 +23,13 @@ from {{ ref('my_seed') }} """ + MY_MATERIALIZED_VIEW = """ {{ config( materialized='materialized_view', partition_by={ "field": "record_date", - "data_type": "timestamp", + "data_type": "datetime", "granularity": "day" }, cluster_by="id", @@ -40,13 +42,39 @@ """ +MY_OTHER_BASE_TABLE = """ +{{ config( + materialized='table', + partition_by={ + "field": "value", + "data_type": "int64", + "range": { + "start": 0, + "end": 500, + "interval": 50 + } + }, + cluster_by=["id", "value"] +) }} +select + id, + value, + record_date +from {{ ref('my_seed') }} +""" + + MY_OTHER_MATERIALIZED_VIEW = """ {{ config( materialized='materialized_view', partition_by={ - "field": "record_date", - "data_type": "timestamp", - "granularity": "day" + "field": "value", + "data_type": "int64", + "range": { + "start": 0, + "end": 500, + "interval": 50 + } }, cluster_by="id", enable_refresh=False, @@ -56,5 +84,5 @@ id, value, record_date -from {{ ref('my_base_table') }} +from {{ ref('my_other_base_table') }} """ diff --git a/tests/functional/adapter/describe_relation/test_describe_relation.py b/tests/functional/adapter/describe_relation/test_describe_relation.py index 4d6c77cca..adccd5126 100644 --- a/tests/functional/adapter/describe_relation/test_describe_relation.py +++ b/tests/functional/adapter/describe_relation/test_describe_relation.py @@ -5,25 +5,21 @@ from dbt.tests.util import get_connection, run_dbt from dbt.adapters.bigquery.relation_configs import BigQueryMaterializedViewConfig -from tests.functional.adapter.describe_relation._files import ( - MY_BASE_TABLE, - MY_MATERIALIZED_VIEW, - MY_OTHER_MATERIALIZED_VIEW, - MY_SEED, -) +from tests.functional.adapter.describe_relation import _files class TestDescribeRelation: @pytest.fixture(scope="class", autouse=True) def seeds(self): - return {"my_seed.csv": MY_SEED} + return {"my_seed.csv": _files.MY_SEED} @pytest.fixture(scope="class", autouse=True) def models(self): yield { - "my_base_table.sql": MY_BASE_TABLE, - "my_materialized_view.sql": MY_MATERIALIZED_VIEW, - "my_other_materialized_view.sql": MY_OTHER_MATERIALIZED_VIEW, + "my_base_table.sql": _files.MY_BASE_TABLE, + "my_materialized_view.sql": _files.MY_MATERIALIZED_VIEW, + "my_other_base_table.sql": _files.MY_OTHER_BASE_TABLE, + "my_other_materialized_view.sql": _files.MY_OTHER_MATERIALIZED_VIEW, } @pytest.fixture(scope="class") @@ -79,9 +75,12 @@ def test_describe_materialized_view(self, project, my_materialized_view): with get_connection(project.adapter): results = project.adapter.describe_relation(my_materialized_view) assert isinstance(results, BigQueryMaterializedViewConfig) - assert results.materialized_view_name == f'"{my_materialized_view.identifier}"' - assert results.schema_name == f'"{my_materialized_view.schema}"' - assert results.database_name == f'"{my_materialized_view.database}"' + assert results.table_id == f'"{my_materialized_view.identifier}"' + assert results.dataset_id == f'"{my_materialized_view.schema}"' + assert results.project_id == f'"{my_materialized_view.database}"' + assert results.partition.field == "record_date" + assert results.partition.data_type == "datetime" + assert results.partition.granularity == "day" assert results.cluster.fields == frozenset({"id"}) assert results.options.enable_refresh is True assert results.options.refresh_interval_minutes == 30 @@ -90,9 +89,12 @@ def test_describe_other_materialized_view(self, project, my_other_materialized_v with get_connection(project.adapter): results = project.adapter.describe_relation(my_other_materialized_view) assert isinstance(results, BigQueryMaterializedViewConfig) - assert results.materialized_view_name == f'"{my_other_materialized_view.identifier}"' - assert results.schema_name == f'"{my_other_materialized_view.schema}"' - assert results.database_name == f'"{my_other_materialized_view.database}"' + assert results.table_id == f'"{my_other_materialized_view.identifier}"' + assert results.dataset_id == f'"{my_other_materialized_view.schema}"' + assert results.project_id == f'"{my_other_materialized_view.database}"' + assert results.partition.field == "value" + assert results.partition.data_type == "int64" + assert results.partition.range == {"start": 0, "end": 500, "interval": 50} assert results.cluster.fields == frozenset({"id"}) assert results.options.enable_refresh is False assert results.options.refresh_interval_minutes == 30 # BQ returns it to the default diff --git a/tests/functional/adapter/materialized_view_tests/_files.py b/tests/functional/adapter/materialized_view_tests/_files.py index 6cc69d2c6..1ee64269d 100644 --- a/tests/functional/adapter/materialized_view_tests/_files.py +++ b/tests/functional/adapter/materialized_view_tests/_files.py @@ -5,12 +5,13 @@ 3,300,2023-01-02 00:00:00 """.strip() + MY_BASE_TABLE = """ {{ config( materialized='table', partition_by={ "field": "record_valid_date", - "data_type": "timestamp", + "data_type": "datetime", "granularity": "day" }, cluster_by=["id", "value"] @@ -22,12 +23,14 @@ from {{ ref('my_seed') }} """ + +# the whitespace to the left on partition matters here MY_MATERIALIZED_VIEW = """ {{ config( materialized='materialized_view', partition_by={ "field": "record_valid_date", - "data_type": "timestamp", + "data_type": "datetime", "granularity": "day" }, cluster_by=["id", "value"], @@ -41,3 +44,26 @@ record_valid_date from {{ ref('my_base_table') }} """ + + +# the whitespace to the left on partition matters here +MY_OTHER_BASE_TABLE = """ +{{ config( + materialized='table', + partition_by={ + "field": "value", + "data_type": "int64", + "range": { + "start": 0, + "end": 500, + "interval": 50 + } + }, + cluster_by=["id", "value"] +) }} +select + id, + value, + record_valid_date +from {{ ref('my_seed') }} +""" diff --git a/tests/functional/adapter/materialized_view_tests/_mixin.py b/tests/functional/adapter/materialized_view_tests/_mixin.py index 5933a2dd2..5f75c7c04 100644 --- a/tests/functional/adapter/materialized_view_tests/_mixin.py +++ b/tests/functional/adapter/materialized_view_tests/_mixin.py @@ -12,11 +12,7 @@ set_model_file, ) -from tests.functional.adapter.materialized_view_tests._files import ( - MY_BASE_TABLE, - MY_MATERIALIZED_VIEW, - MY_SEED, -) +from tests.functional.adapter.materialized_view_tests import _files class BigQueryMaterializedViewMixin: @@ -35,11 +31,24 @@ def my_base_table(self, project) -> BaseRelation: type=RelationType.Table, ) + @pytest.fixture(scope="class") + def my_other_base_table(self, project) -> BaseRelation: + """ + Following the sentiment of `my_base_table` above, if we want to alter the partition + on the materialized view, we either need to update the partition on the base table, + or we need a second table with a different partition. + """ + return project.adapter.Relation.create( + identifier="my_other_base_table", + schema=project.test_schema, + database=project.database, + type=RelationType.Table, + ) + @pytest.fixture(scope="function", autouse=True) - def setup(self, project, my_base_table, my_materialized_view): # type: ignore + def setup(self, project, my_base_table, my_other_base_table, my_materialized_view): # type: ignore run_dbt(["seed"]) - run_dbt(["run", "--models", my_base_table.identifier, "--full-refresh"]) - run_dbt(["run", "--models", my_materialized_view.identifier, "--full-refresh"]) + run_dbt(["run", "--full-refresh"]) # the tests touch these files, store their contents in memory initial_model = get_model_file(project, my_materialized_view) @@ -52,15 +61,16 @@ def setup(self, project, my_base_table, my_materialized_view): # type: ignore @pytest.fixture(scope="class", autouse=True) def seeds(self): - return {"my_seed.csv": MY_SEED} + return {"my_seed.csv": _files.MY_SEED} @pytest.fixture(scope="class", autouse=True) def models(self): yield { "my_table.sql": MY_TABLE, "my_view.sql": MY_VIEW, - "my_base_table.sql": MY_BASE_TABLE, - "my_materialized_view.sql": MY_MATERIALIZED_VIEW, + "my_base_table.sql": _files.MY_BASE_TABLE, + "my_other_base_table.sql": _files.MY_OTHER_BASE_TABLE, + "my_materialized_view.sql": _files.MY_MATERIALIZED_VIEW, } @staticmethod diff --git a/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py b/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py index 826ec5d24..d2df9735e 100644 --- a/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py +++ b/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py @@ -19,7 +19,9 @@ def check_start_state(project, materialized_view): assert isinstance(results, BigQueryMaterializedViewConfig) assert results.options.enable_refresh is True assert results.options.refresh_interval_minutes == 60 - assert results.options.max_staleness == "0-0 0 0:45:0" # ~= "INTERVAL 45 MINUTE" + assert results.partition.field == "record_valid_date" + assert results.partition.data_type == "datetime" + assert results.partition.granularity == "day" assert results.cluster.fields == frozenset({"id", "value"}) @staticmethod @@ -43,7 +45,28 @@ def check_state_alter_change_is_applied(project, materialized_view): @staticmethod def change_config_via_replace(project, materialized_view): initial_model = get_model_file(project, materialized_view) - new_model = initial_model.replace('cluster_by=["id", "value"]', 'cluster_by="id"') + # the whitespace to the left on partition matters here + old_partition = """ + partition_by={ + "field": "record_valid_date", + "data_type": "datetime", + "granularity": "day" + },""" + new_partition = """ + partition_by={ + "field": "value", + "data_type": "int64", + "range": { + "start": 0, + "end": 500, + "interval": 50 + } + },""" + new_model = ( + initial_model.replace('cluster_by=["id", "value"]', 'cluster_by="id"') + .replace(old_partition, new_partition) + .replace("'my_base_table'", "'my_other_base_table'") + ) set_model_file(project, materialized_view, new_model) @staticmethod From f3b1e71a015d1d7d91c95c44792695c496f8db40 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 11 Oct 2023 15:47:58 -0400 Subject: [PATCH 51/53] implement PR review feedback --- dbt/adapters/bigquery/impl.py | 9 +++- dbt/adapters/bigquery/relation.py | 20 ++++----- .../bigquery/relation_configs/__init__.py | 11 ++--- .../bigquery/relation_configs/_base.py | 12 +++--- .../{cluster.py => _cluster.py} | 8 ++-- ...rialized_view.py => _materialized_view.py} | 16 ++++--- .../{options.py => _options.py} | 4 +- .../{partition.py => _partition.py} | 37 ++++++++-------- .../{policies.py => _policies.py} | 0 .../relation_components/cluster/describe.sql | 15 ------- .../relation_components/options/describe.sql | 15 ------- .../partition/describe.sql | 42 ------------------- 12 files changed, 62 insertions(+), 127 deletions(-) rename dbt/adapters/bigquery/relation_configs/{cluster.py => _cluster.py} (91%) rename dbt/adapters/bigquery/relation_configs/{materialized_view.py => _materialized_view.py} (89%) rename dbt/adapters/bigquery/relation_configs/{options.py => _options.py} (98%) rename dbt/adapters/bigquery/relation_configs/{partition.py => _partition.py} (85%) rename dbt/adapters/bigquery/relation_configs/{policies.py => _policies.py} (100%) delete mode 100644 dbt/include/bigquery/macros/relation_components/cluster/describe.sql delete mode 100644 dbt/include/bigquery/macros/relation_components/options/describe.sql delete mode 100644 dbt/include/bigquery/macros/relation_components/partition/describe.sql diff --git a/dbt/adapters/bigquery/impl.py b/dbt/adapters/bigquery/impl.py index 6adfaeee5..50ce21f11 100644 --- a/dbt/adapters/bigquery/impl.py +++ b/dbt/adapters/bigquery/impl.py @@ -45,6 +45,7 @@ ) from dbt.adapters.bigquery.relation import BigQueryRelation from dbt.adapters.bigquery.relation_configs import ( + BigQueryBaseRelationConfig, BigQueryMaterializedViewConfig, PartitionConfig, ) @@ -760,7 +761,9 @@ def get_bq_table(self, relation: BigQueryRelation) -> Optional[BigQueryTable]: return table @available.parse(lambda *a, **k: True) - def describe_relation(self, relation: BigQueryRelation): + def describe_relation( + self, relation: BigQueryRelation + ) -> Optional[BigQueryBaseRelationConfig]: if relation.type == RelationType.MaterializedView: bq_table = self.get_bq_table(relation) parser = BigQueryMaterializedViewConfig @@ -769,7 +772,9 @@ def describe_relation(self, relation: BigQueryRelation): f"The method `BigQueryAdapter.describe_relation` is not implemented " f"for the relation type: {relation.type}" ) - return parser.from_bq_table(bq_table) + if bq_table: + return parser.from_bq_table(bq_table) + return None @available.parse_none def grant_access_to(self, entity, entity_type, role, grant_target_dict): diff --git a/dbt/adapters/bigquery/relation.py b/dbt/adapters/bigquery/relation.py index 3076a2243..a689e76fc 100644 --- a/dbt/adapters/bigquery/relation.py +++ b/dbt/adapters/bigquery/relation.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import FrozenSet, Optional, TypeVar from itertools import chain, islice @@ -10,7 +10,7 @@ BigQueryMaterializedViewConfig, BigQueryMaterializedViewConfigChangeset, BigQueryOptionsConfigChange, - BigQueryQuotePolicy, + BigQueryPartitionConfigChange, ) from dbt.contracts.graph.nodes import ModelNode from dbt.contracts.relation import RelationType @@ -25,9 +25,6 @@ class BigQueryRelation(BaseRelation): quote_character: str = "`" location: Optional[str] = None - # this is causing unit tests to fail - # include_policy: BigQueryIncludePolicy = field(default_factory=lambda: BigQueryIncludePolicy()) - quote_policy: BigQueryQuotePolicy = field(default_factory=lambda: BigQueryQuotePolicy()) renameable_relations: FrozenSet[RelationType] = frozenset({RelationType.Table}) replaceable_relations: FrozenSet[RelationType] = frozenset( {RelationType.Table, RelationType.View} @@ -79,8 +76,6 @@ def materialized_view_config_changeset( ) -> Optional[BigQueryMaterializedViewConfigChangeset]: config_change_collection = BigQueryMaterializedViewConfigChangeset() new_materialized_view = cls.materialized_view_from_model_node(runtime_config.model) - assert isinstance(existing_materialized_view, BigQueryMaterializedViewConfig) - assert isinstance(new_materialized_view, BigQueryMaterializedViewConfig) if new_materialized_view.options != existing_materialized_view.options: config_change_collection.options = BigQueryOptionsConfigChange( @@ -88,10 +83,13 @@ def materialized_view_config_changeset( context=new_materialized_view.options, ) - if ( - new_materialized_view.cluster != existing_materialized_view.cluster - and new_materialized_view.cluster - ): + if new_materialized_view.partition != existing_materialized_view.partition: + config_change_collection.partition = BigQueryPartitionConfigChange( + action=RelationConfigChangeAction.alter, + context=new_materialized_view.partition, + ) + + if new_materialized_view.cluster != existing_materialized_view.cluster: config_change_collection.cluster = BigQueryClusterConfigChange( action=RelationConfigChangeAction.alter, context=new_materialized_view.cluster, diff --git a/dbt/adapters/bigquery/relation_configs/__init__.py b/dbt/adapters/bigquery/relation_configs/__init__.py index af1a0a9b0..9ccdec1e0 100644 --- a/dbt/adapters/bigquery/relation_configs/__init__.py +++ b/dbt/adapters/bigquery/relation_configs/__init__.py @@ -1,20 +1,21 @@ -from dbt.adapters.bigquery.relation_configs.cluster import ( +from dbt.adapters.bigquery.relation_configs._base import BigQueryBaseRelationConfig +from dbt.adapters.bigquery.relation_configs._cluster import ( BigQueryClusterConfig, BigQueryClusterConfigChange, ) -from dbt.adapters.bigquery.relation_configs.materialized_view import ( +from dbt.adapters.bigquery.relation_configs._materialized_view import ( BigQueryMaterializedViewConfig, BigQueryMaterializedViewConfigChangeset, ) -from dbt.adapters.bigquery.relation_configs.options import ( +from dbt.adapters.bigquery.relation_configs._options import ( BigQueryOptionsConfig, BigQueryOptionsConfigChange, ) -from dbt.adapters.bigquery.relation_configs.partition import ( +from dbt.adapters.bigquery.relation_configs._partition import ( PartitionConfig, BigQueryPartitionConfigChange, ) -from dbt.adapters.bigquery.relation_configs.policies import ( +from dbt.adapters.bigquery.relation_configs._policies import ( BigQueryIncludePolicy, BigQueryQuotePolicy, ) diff --git a/dbt/adapters/bigquery/relation_configs/_base.py b/dbt/adapters/bigquery/relation_configs/_base.py index 92de1a854..c92f7c01a 100644 --- a/dbt/adapters/bigquery/relation_configs/_base.py +++ b/dbt/adapters/bigquery/relation_configs/_base.py @@ -6,7 +6,7 @@ from dbt.adapters.relation_configs import RelationConfigBase from google.cloud.bigquery import Table as BigQueryTable -from dbt.adapters.bigquery.relation_configs.policies import ( +from dbt.adapters.bigquery.relation_configs._policies import ( BigQueryIncludePolicy, BigQueryQuotePolicy, ) @@ -15,7 +15,7 @@ @dataclass(frozen=True, eq=True, unsafe_hash=True) -class BigQueryRelationConfigBase(RelationConfigBase): +class BigQueryBaseRelationConfig(RelationConfigBase): @classmethod def include_policy(cls) -> Policy: return BigQueryIncludePolicy() @@ -25,10 +25,10 @@ def quote_policy(cls) -> Policy: return BigQueryQuotePolicy() @classmethod - def from_model_node(cls, model_node: ModelNode) -> "RelationConfigBase": + def from_model_node(cls, model_node: ModelNode) -> "BigQueryBaseRelationConfig": relation_config = cls.parse_model_node(model_node) relation = cls.from_dict(relation_config) - return relation + return relation # type: ignore @classmethod def parse_model_node(cls, model_node: ModelNode) -> dict: @@ -37,10 +37,10 @@ def parse_model_node(cls, model_node: ModelNode) -> dict: ) @classmethod - def from_bq_table(cls, table: BigQueryTable) -> "RelationConfigBase": + def from_bq_table(cls, table: BigQueryTable) -> "BigQueryBaseRelationConfig": relation_config = cls.parse_bq_table(table) relation = cls.from_dict(relation_config) - return relation + return relation # type: ignore @classmethod def parse_bq_table(cls, table: BigQueryTable) -> dict: diff --git a/dbt/adapters/bigquery/relation_configs/cluster.py b/dbt/adapters/bigquery/relation_configs/_cluster.py similarity index 91% rename from dbt/adapters/bigquery/relation_configs/cluster.py rename to dbt/adapters/bigquery/relation_configs/_cluster.py index ad5b7b2ed..c7161a96a 100644 --- a/dbt/adapters/bigquery/relation_configs/cluster.py +++ b/dbt/adapters/bigquery/relation_configs/_cluster.py @@ -1,15 +1,15 @@ from dataclasses import dataclass -from typing import Any, Dict, FrozenSet +from typing import Any, Dict, FrozenSet, Optional from dbt.adapters.relation_configs import RelationConfigChange from dbt.contracts.graph.nodes import ModelNode from google.cloud.bigquery import Table as BigQueryTable -from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase +from dbt.adapters.bigquery.relation_configs._base import BigQueryBaseRelationConfig @dataclass(frozen=True, eq=True, unsafe_hash=True) -class BigQueryClusterConfig(BigQueryRelationConfigBase): +class BigQueryClusterConfig(BigQueryBaseRelationConfig): """ This config manages table options supporting clustering. See the following for more information: - https://docs.getdbt.com/reference/resource-configs/bigquery-configs#using-table-partitioning-and-clustering @@ -47,7 +47,7 @@ def parse_bq_table(cls, table: BigQueryTable) -> Dict[str, Any]: # type: ignore @dataclass(frozen=True, eq=True, unsafe_hash=True) class BigQueryClusterConfigChange(RelationConfigChange): - context: BigQueryClusterConfig + context: Optional[BigQueryClusterConfig] @property def requires_full_refresh(self) -> bool: diff --git a/dbt/adapters/bigquery/relation_configs/materialized_view.py b/dbt/adapters/bigquery/relation_configs/_materialized_view.py similarity index 89% rename from dbt/adapters/bigquery/relation_configs/materialized_view.py rename to dbt/adapters/bigquery/relation_configs/_materialized_view.py index 51351aabf..a9baa3ab0 100644 --- a/dbt/adapters/bigquery/relation_configs/materialized_view.py +++ b/dbt/adapters/bigquery/relation_configs/_materialized_view.py @@ -5,20 +5,23 @@ from dbt.contracts.relation import ComponentName from google.cloud.bigquery import Table as BigQueryTable -from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase -from dbt.adapters.bigquery.relation_configs.options import ( +from dbt.adapters.bigquery.relation_configs._base import BigQueryBaseRelationConfig +from dbt.adapters.bigquery.relation_configs._options import ( BigQueryOptionsConfig, BigQueryOptionsConfigChange, ) -from dbt.adapters.bigquery.relation_configs.partition import PartitionConfig -from dbt.adapters.bigquery.relation_configs.cluster import ( +from dbt.adapters.bigquery.relation_configs._partition import ( + BigQueryPartitionConfigChange, + PartitionConfig, +) +from dbt.adapters.bigquery.relation_configs._cluster import ( BigQueryClusterConfig, BigQueryClusterConfigChange, ) @dataclass(frozen=True, eq=True, unsafe_hash=True) -class BigQueryMaterializedViewConfig(BigQueryRelationConfigBase): +class BigQueryMaterializedViewConfig(BigQueryBaseRelationConfig): """ This config follow the specs found here: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_materialized_view_statement @@ -101,6 +104,7 @@ def parse_bq_table(cls, table: BigQueryTable) -> Dict[str, Any]: @dataclass class BigQueryMaterializedViewConfigChangeset: options: Optional[BigQueryOptionsConfigChange] = None + partition: Optional[BigQueryPartitionConfigChange] = None cluster: Optional[BigQueryClusterConfigChange] = None @property @@ -108,6 +112,7 @@ def requires_full_refresh(self) -> bool: return any( { self.options.requires_full_refresh if self.options else False, + self.partition.requires_full_refresh if self.partition else False, self.cluster.requires_full_refresh if self.cluster else False, } ) @@ -117,6 +122,7 @@ def has_changes(self) -> bool: return any( { self.options if self.options else False, + self.partition if self.partition else False, self.cluster if self.cluster else False, } ) diff --git a/dbt/adapters/bigquery/relation_configs/options.py b/dbt/adapters/bigquery/relation_configs/_options.py similarity index 98% rename from dbt/adapters/bigquery/relation_configs/options.py rename to dbt/adapters/bigquery/relation_configs/_options.py index 758740c50..51774e3fb 100644 --- a/dbt/adapters/bigquery/relation_configs/options.py +++ b/dbt/adapters/bigquery/relation_configs/_options.py @@ -6,12 +6,12 @@ from dbt.contracts.graph.nodes import ModelNode from google.cloud.bigquery import Table as BigQueryTable -from dbt.adapters.bigquery.relation_configs._base import BigQueryRelationConfigBase +from dbt.adapters.bigquery.relation_configs._base import BigQueryBaseRelationConfig from dbt.adapters.bigquery.utility import bool_setting, float_setting, sql_escape @dataclass(frozen=True, eq=True, unsafe_hash=True) -class BigQueryOptionsConfig(BigQueryRelationConfigBase): +class BigQueryOptionsConfig(BigQueryBaseRelationConfig): """ This config manages materialized view options. See the following for more information: - https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#materialized_view_option_list diff --git a/dbt/adapters/bigquery/relation_configs/partition.py b/dbt/adapters/bigquery/relation_configs/_partition.py similarity index 85% rename from dbt/adapters/bigquery/relation_configs/partition.py rename to dbt/adapters/bigquery/relation_configs/_partition.py index cd57719db..094e4f1c4 100644 --- a/dbt/adapters/bigquery/relation_configs/partition.py +++ b/dbt/adapters/bigquery/relation_configs/_partition.py @@ -1,10 +1,10 @@ from dataclasses import dataclass from typing import Any, Dict, List, Optional +from dbt.adapters.relation_configs import RelationConfigChange from dbt.contracts.graph.nodes import ModelNode from dbt.dataclass_schema import dbtClassMixin, ValidationError import dbt.exceptions -from dbt.adapters.relation_configs import RelationConfigChange from google.cloud.bigquery.table import Table as BigQueryTable @@ -104,13 +104,26 @@ def parse(cls, raw_partition_by) -> Optional["PartitionConfig"]: def parse_model_node(cls, model_node: ModelNode) -> Dict[str, Any]: """ Parse model node into a raw config for `PartitionConfig.parse` + + - Note: + This doesn't currently collect `time_ingestion_partitioning` and `copy_partitions` + because this was built for materialized views, which do not support those settings. """ - return model_node.config.extra.get("partition_by") + config_dict = model_node.config.extra.get("partition_by") + if "time_ingestion_partitioning" in config_dict: + del config_dict["time_ingestion_partitioning"] + if "copy_partitions" in config_dict: + del config_dict["copy_partitions"] + return config_dict @classmethod def parse_bq_table(cls, table: BigQueryTable) -> Dict[str, Any]: """ - Parse the results of a BQ Table object into a raw config for `PartitionConfig.parse` + Parse the BQ Table object into a raw config for `PartitionConfig.parse` + + - Note: + This doesn't currently collect `time_ingestion_partitioning` and `copy_partitions` + because this was built for materialized views, which do not support those settings. """ if time_partitioning := table.time_partitioning: field_types = {field.name: field.field_type.lower() for field in table.schema} @@ -136,26 +149,10 @@ def parse_bq_table(cls, table: BigQueryTable) -> Dict[str, Any]: return config_dict - def __eq__(self, other: Any) -> bool: - """ - We can't query partitions on materialized views, hence we are assuming that if the field and data type - have not changed, then the partition has not changed either. This should be updated to include the - granularity and range once that issue is resolved. Until then, users will need to supply --full-refresh - if they keep the field but change the partition granularity. - """ - if isinstance(other, PartitionConfig): - return all( - { - other.field == self.field, - other.data_type == self.data_type, - } - ) - return False - @dataclass(frozen=True, eq=True, unsafe_hash=True) class BigQueryPartitionConfigChange(RelationConfigChange): - context: PartitionConfig + context: Optional[PartitionConfig] @property def requires_full_refresh(self) -> bool: diff --git a/dbt/adapters/bigquery/relation_configs/policies.py b/dbt/adapters/bigquery/relation_configs/_policies.py similarity index 100% rename from dbt/adapters/bigquery/relation_configs/policies.py rename to dbt/adapters/bigquery/relation_configs/_policies.py diff --git a/dbt/include/bigquery/macros/relation_components/cluster/describe.sql b/dbt/include/bigquery/macros/relation_components/cluster/describe.sql deleted file mode 100644 index 43d1eeb9b..000000000 --- a/dbt/include/bigquery/macros/relation_components/cluster/describe.sql +++ /dev/null @@ -1,15 +0,0 @@ -{% macro bigquery__get_describe_cluster_sql(relation) %} - select - column_name - from {{ relation.information_schema('COLUMNS') }} - where table_name = '{{ relation.identifier }}' - and table_schema = '{{ relation.schema }}' - and table_catalog = '{{ relation.database }}' - and clustering_ordinal_position is not null -{% endmacro %} - - -{% macro bigquery__describe_cluster(relation) %} - {%- set _sql = bigquery__get_describe_cluster_sql(relation) -%} - {% do return(run_query(_sql)) %} -{% endmacro %} diff --git a/dbt/include/bigquery/macros/relation_components/options/describe.sql b/dbt/include/bigquery/macros/relation_components/options/describe.sql deleted file mode 100644 index 55dbfa947..000000000 --- a/dbt/include/bigquery/macros/relation_components/options/describe.sql +++ /dev/null @@ -1,15 +0,0 @@ -{% macro bigquery__get_describe_options_sql(relation) %} - select - option_name, - option_value - from {{ relation.information_schema('TABLE_OPTIONS') }} - where table_name = '{{ relation.identifier }}' - and table_schema = '{{ relation.schema }}' - and table_catalog = '{{ relation.database }}' -{% endmacro %} - - -{% macro bigquery__describe_options(relation) %} - {%- set _sql = bigquery__get_describe_options_sql(relation) -%} - {% do return(run_query(_sql)) %} -{% endmacro %} diff --git a/dbt/include/bigquery/macros/relation_components/partition/describe.sql b/dbt/include/bigquery/macros/relation_components/partition/describe.sql deleted file mode 100644 index 1efdd56a1..000000000 --- a/dbt/include/bigquery/macros/relation_components/partition/describe.sql +++ /dev/null @@ -1,42 +0,0 @@ -{% macro bigquery__get_describe_partition_sql(relation) %} - with max_partition_id as ( - select - table_name, - table_schema, - table_catalog, - max(partition_id) as partition_id - from {{ relation.information_schema('PARTITIONS') }} - where table_name = '{{ relation.identifier }}' - and table_schema = '{{ relation.schema }}' - and table_catalog = '{{ relation.database }}' - group by - table_name, - table_schema, - table_catalog - ) - - select - c.column_name as partition_column_name, - c.data_type as partition_data_type, - case - when regexp_contains(p.partition_id, '^[0-9]{4}$') THEN 'year' - when regexp_contains(p.partition_id, '^[0-9]{6}$') THEN 'month' - when regexp_contains(p.partition_id, '^[0-9]{8}$') THEN 'day' - when regexp_contains(p.partition_id, '^[0-9]{10}$') THEN 'hour' - end as partition_type - from {{ relation.information_schema('COLUMNS') }} c - left join max_partition_id p - on p.table_name = c.table_name - and p.table_schema = c.table_schema - and p.table_catalog = c.table_catalog - where c.table_name = '{{ relation.identifier }}' - and c.table_schema = '{{ relation.schema }}' - and c.table_catalog = '{{ relation.database }}' - and c.is_partitioning_column = 'YES' -{% endmacro %} - - -{% macro bigquery__describe_partition(relation) %} - {% set bq_relation = adapter.connections.get_bq_table(relation.database, relation.schema, relation.identifier) %} - {% do return(bq_relation) %} -{% endmacro %} From b5c19d93d5ae90f9a8b997a6a02b76ebf124188b Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 11 Oct 2023 16:10:02 -0400 Subject: [PATCH 52/53] delete empty file --- .../bigquery/macros/relation_components/partition/describe.sql | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 dbt/include/bigquery/macros/relation_components/partition/describe.sql diff --git a/dbt/include/bigquery/macros/relation_components/partition/describe.sql b/dbt/include/bigquery/macros/relation_components/partition/describe.sql deleted file mode 100644 index e69de29bb..000000000 From b134fb3f8f110e68941d0f8a98b47ca70b371cc0 Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 11 Oct 2023 18:10:18 -0400 Subject: [PATCH 53/53] add MV tests for cluster and partition alone, update combined tests to perform all checks --- .../test_materialized_view_changes.py | 10 +- .../test_materialized_view_cluster_changes.py | 69 ++++++++++++++ ...est_materialized_view_partition_changes.py | 93 +++++++++++++++++++ 3 files changed, 167 insertions(+), 5 deletions(-) create mode 100644 tests/functional/adapter/materialized_view_tests/test_materialized_view_cluster_changes.py create mode 100644 tests/functional/adapter/materialized_view_tests/test_materialized_view_partition_changes.py diff --git a/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py b/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py index d2df9735e..c821c68fc 100644 --- a/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py +++ b/tests/functional/adapter/materialized_view_tests/test_materialized_view_changes.py @@ -38,9 +38,6 @@ def check_state_alter_change_is_applied(project, materialized_view): # these change when run manually assert results.options.enable_refresh is False assert results.options.refresh_interval_minutes == 30 # BQ returns it to the default - # this does not change when run manually - # in fact, it doesn't even show up in the DDL whereas the other two do - assert results.options.max_staleness is None @staticmethod def change_config_via_replace(project, materialized_view): @@ -63,9 +60,9 @@ def change_config_via_replace(project, materialized_view): } },""" new_model = ( - initial_model.replace('cluster_by=["id", "value"]', 'cluster_by="id"') - .replace(old_partition, new_partition) + initial_model.replace(old_partition, new_partition) .replace("'my_base_table'", "'my_other_base_table'") + .replace('cluster_by=["id", "value"]', 'cluster_by="id"') ) set_model_file(project, materialized_view, new_model) @@ -74,6 +71,9 @@ def check_state_replace_change_is_applied(project, materialized_view): with get_connection(project.adapter): results = project.adapter.describe_relation(materialized_view) assert isinstance(results, BigQueryMaterializedViewConfig) + assert results.partition.field == "value" + assert results.partition.data_type == "int64" + assert results.partition.range == {"start": 0, "end": 500, "interval": 50} assert results.cluster.fields == frozenset({"id"}) diff --git a/tests/functional/adapter/materialized_view_tests/test_materialized_view_cluster_changes.py b/tests/functional/adapter/materialized_view_tests/test_materialized_view_cluster_changes.py new file mode 100644 index 000000000..74e174d4f --- /dev/null +++ b/tests/functional/adapter/materialized_view_tests/test_materialized_view_cluster_changes.py @@ -0,0 +1,69 @@ +from dbt.tests.adapter.materialized_view.changes import ( + MaterializedViewChanges, + MaterializedViewChangesApplyMixin, + MaterializedViewChangesContinueMixin, + MaterializedViewChangesFailMixin, +) +from dbt.tests.util import get_connection, get_model_file, set_model_file + +from dbt.adapters.bigquery.relation_configs import BigQueryMaterializedViewConfig + +from tests.functional.adapter.materialized_view_tests._mixin import BigQueryMaterializedViewMixin + + +class BigQueryMaterializedViewClusterChanges( + BigQueryMaterializedViewMixin, MaterializedViewChanges +): + @staticmethod + def check_start_state(project, materialized_view): + with get_connection(project.adapter): + results = project.adapter.describe_relation(materialized_view) + assert isinstance(results, BigQueryMaterializedViewConfig) + assert results.options.enable_refresh is True + assert results.options.refresh_interval_minutes == 60 + assert results.cluster.fields == frozenset({"id", "value"}) + + @staticmethod + def change_config_via_alter(project, materialized_view): + initial_model = get_model_file(project, materialized_view) + new_model = initial_model.replace("enable_refresh=True", "enable_refresh=False") + set_model_file(project, materialized_view, new_model) + + @staticmethod + def check_state_alter_change_is_applied(project, materialized_view): + with get_connection(project.adapter): + results = project.adapter.describe_relation(materialized_view) + assert isinstance(results, BigQueryMaterializedViewConfig) + assert results.options.enable_refresh is False + assert results.options.refresh_interval_minutes == 30 # BQ returns it to the default + + @staticmethod + def change_config_via_replace(project, materialized_view): + initial_model = get_model_file(project, materialized_view) + new_model = initial_model.replace('cluster_by=["id", "value"]', 'cluster_by="id"') + set_model_file(project, materialized_view, new_model) + + @staticmethod + def check_state_replace_change_is_applied(project, materialized_view): + with get_connection(project.adapter): + results = project.adapter.describe_relation(materialized_view) + assert isinstance(results, BigQueryMaterializedViewConfig) + assert results.cluster.fields == frozenset({"id"}) + + +class TestBigQueryMaterializedViewClusterChangesApply( + BigQueryMaterializedViewClusterChanges, MaterializedViewChangesApplyMixin +): + pass + + +class TestBigQueryMaterializedViewClusterChangesContinue( + BigQueryMaterializedViewClusterChanges, MaterializedViewChangesContinueMixin +): + pass + + +class TestBigQueryMaterializedViewClusterChangesFail( + BigQueryMaterializedViewClusterChanges, MaterializedViewChangesFailMixin +): + pass diff --git a/tests/functional/adapter/materialized_view_tests/test_materialized_view_partition_changes.py b/tests/functional/adapter/materialized_view_tests/test_materialized_view_partition_changes.py new file mode 100644 index 000000000..7f396ae1b --- /dev/null +++ b/tests/functional/adapter/materialized_view_tests/test_materialized_view_partition_changes.py @@ -0,0 +1,93 @@ +from dbt.tests.adapter.materialized_view.changes import ( + MaterializedViewChanges, + MaterializedViewChangesApplyMixin, + MaterializedViewChangesContinueMixin, + MaterializedViewChangesFailMixin, +) +from dbt.tests.util import get_connection, get_model_file, set_model_file + +from dbt.adapters.bigquery.relation_configs import BigQueryMaterializedViewConfig + +from tests.functional.adapter.materialized_view_tests._mixin import BigQueryMaterializedViewMixin + + +class BigQueryMaterializedViewPartitionChanges( + BigQueryMaterializedViewMixin, MaterializedViewChanges +): + @staticmethod + def check_start_state(project, materialized_view): + with get_connection(project.adapter): + results = project.adapter.describe_relation(materialized_view) + assert isinstance(results, BigQueryMaterializedViewConfig) + assert results.options.enable_refresh is True + assert results.options.refresh_interval_minutes == 60 + assert results.partition.field == "record_valid_date" + assert results.partition.data_type == "datetime" + assert results.partition.granularity == "day" + + @staticmethod + def change_config_via_alter(project, materialized_view): + initial_model = get_model_file(project, materialized_view) + new_model = initial_model.replace("enable_refresh=True", "enable_refresh=False") + set_model_file(project, materialized_view, new_model) + + @staticmethod + def check_state_alter_change_is_applied(project, materialized_view): + with get_connection(project.adapter): + results = project.adapter.describe_relation(materialized_view) + assert isinstance(results, BigQueryMaterializedViewConfig) + # these change when run manually + assert results.options.enable_refresh is False + assert results.options.refresh_interval_minutes == 30 # BQ returns it to the default + + @staticmethod + def change_config_via_replace(project, materialized_view): + initial_model = get_model_file(project, materialized_view) + # the whitespace to the left on partition matters here + old_partition = """ + partition_by={ + "field": "record_valid_date", + "data_type": "datetime", + "granularity": "day" + },""" + new_partition = """ + partition_by={ + "field": "value", + "data_type": "int64", + "range": { + "start": 0, + "end": 500, + "interval": 50 + } + },""" + new_model = initial_model.replace(old_partition, new_partition).replace( + "'my_base_table'", "'my_other_base_table'" + ) + set_model_file(project, materialized_view, new_model) + + @staticmethod + def check_state_replace_change_is_applied(project, materialized_view): + with get_connection(project.adapter): + results = project.adapter.describe_relation(materialized_view) + assert isinstance(results, BigQueryMaterializedViewConfig) + assert results.partition.field == "value" + assert results.partition.data_type == "int64" + assert results.partition.range == {"start": 0, "end": 500, "interval": 50} + + +class TestBigQueryMaterializedViewPartitionChangesApply( + BigQueryMaterializedViewPartitionChanges, MaterializedViewChangesApplyMixin +): + pass + + +class TestBigQueryMaterializedViewPartitionChangesContinue( + BigQueryMaterializedViewPartitionChanges, MaterializedViewChangesContinueMixin +): + pass + + +class TestBigQueryMaterializedViewPartitionChangesFail( + BigQueryMaterializedViewPartitionChanges, MaterializedViewChangesFailMixin +): + pass