From d88d157642905a0bc5f2f3ef16953b13c38ecb4f Mon Sep 17 00:00:00 2001 From: austin-stytch <101587420+austin-stytch@users.noreply.github.com> Date: Mon, 25 Apr 2022 12:10:02 -0600 Subject: [PATCH 1/2] check for path values before indexing into objects --- dbt_docs_to_notion.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/dbt_docs_to_notion.py b/dbt_docs_to_notion.py index 9670ccc..0a359c1 100644 --- a/dbt_docs_to_notion.py +++ b/dbt_docs_to_notion.py @@ -29,6 +29,17 @@ def make_request(endpoint, querystring='', method='GET', **request_kwargs): return resp.json() + +def get_path_or_empty(parent_object, path_array, zero_value=''): + obj = parent_object + for el in path_array: + if el not in obj: + return zero_value + obj = obj[el] + + return obj + + def main(): model_records_to_write = sys.argv[1:] # 'all' or list of model names print(f'Model records to write: {model_records_to_write}') @@ -163,7 +174,7 @@ def main(): } } ] - col_names_and_data = list(catalog_nodes[model_name]['columns'].items()) + col_names_and_data = list(get_path_or_empty(catalog_nodes, [model_name, 'columns'], {}).items()) for (col_name, col_data) in col_names_and_data[:98]: # notion api limit is 100 table rows columns_table_children_obj.append( { @@ -367,7 +378,7 @@ def main(): { "text": { "content": str( - catalog_nodes[model_name]['metadata']['owner'] + get_path_or_empty(catalog_nodes, [model_name, 'metadata', 'owner'], '') )[:2000] } } @@ -383,10 +394,10 @@ def main(): ] }, "Approx Rows": { - "number": catalog_nodes[model_name]['stats']['num_rows']['value'] + "number": get_path_or_empty(catalog_nodes, [model_name, 'stats', 'num_rows', 'value'], -1) }, "Approx GB": { - "number": catalog_nodes[model_name]['stats']['num_bytes']['value']/1e9 + "number": get_path_or_empty(catalog_nodes, [model_name, 'stats', 'num_bytes', 'value'], -1) /1e9 }, "Depends On": { "rich_text": [ From e0579b7b0fef2b25dd60bdde4128516db2d877be Mon Sep 17 00:00:00 2001 From: Austin Lin Gibbons Date: Mon, 25 Apr 2022 15:18:10 -0600 Subject: [PATCH 2/2] austin/dbt: add multiple search paths for bytes, rows, and owner --- dbt_docs_to_notion.py | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/dbt_docs_to_notion.py b/dbt_docs_to_notion.py index 0a359c1..832e7a0 100644 --- a/dbt_docs_to_notion.py +++ b/dbt_docs_to_notion.py @@ -9,6 +9,7 @@ DATABASE_PARENT_ID = os.environ['DATABASE_PARENT_ID'] DATABASE_NAME = os.environ['DATABASE_NAME'] NOTION_TOKEN = os.environ['NOTION_TOKEN'] +NUMERIC_ZERO_VALUE = -1 def make_request(endpoint, querystring='', method='GET', **request_kwargs): @@ -40,6 +41,38 @@ def get_path_or_empty(parent_object, path_array, zero_value=''): return obj +def get_num_rows(catalog_nodes, model_name): + zero_value = NUMERIC_ZERO_VALUE + keys = ['num_rows', 'row_count'] + for key in keys: + num_rows = get_path_or_empty(catalog_nodes, [model_name, 'stats', key, 'value'], NUMERIC_ZERO_VALUE) + if num_rows != NUMERIC_ZERO_VALUE: + return num_rows + + return NUMERIC_ZERO_VALUE + + +def get_bytes(catalog_nodes, model_name): + zero_value = NUMERIC_ZERO_VALUE + keys = ['num_bytes', 'bytes'] + for key in keys: + num_rows = get_path_or_empty(catalog_nodes, [model_name, 'stats', key, 'value'], NUMERIC_ZERO_VALUE) + if num_rows != NUMERIC_ZERO_VALUE: + return num_rows + + return NUMERIC_ZERO_VALUE + + +def get_owner(data, catalog_nodes, model_name): + # Check for an owner field explicitly named in the DBT Config + # If none present, fall back to database table owner + owner = get_path_or_empty(data, ['config', 'meta', 'owner'], None) + if owner != None: + return owner + + return get_path_or_empty(catalog_nodes, [model_name, 'metadata', 'owner'], '') + + def main(): model_records_to_write = sys.argv[1:] # 'all' or list of model names print(f'Model records to write: {model_records_to_write}') @@ -378,7 +411,7 @@ def main(): { "text": { "content": str( - get_path_or_empty(catalog_nodes, [model_name, 'metadata', 'owner'], '') + get_owner(data, catalog_nodes, model_name) )[:2000] } } @@ -394,10 +427,10 @@ def main(): ] }, "Approx Rows": { - "number": get_path_or_empty(catalog_nodes, [model_name, 'stats', 'num_rows', 'value'], -1) + "number": get_num_rows(catalog_nodes, model_name) }, "Approx GB": { - "number": get_path_or_empty(catalog_nodes, [model_name, 'stats', 'num_bytes', 'value'], -1) /1e9 + "number":get_bytes(catalog_nodes, model_name) /1e9 }, "Depends On": { "rich_text": [