From 0ce4857c0200156380fd7ce024edfd953cde5dd2 Mon Sep 17 00:00:00 2001 From: Keegan Date: Thu, 16 May 2024 03:14:43 +0000 Subject: [PATCH] Some workflow fixes --- .../onix_workflow/onix_workflow.py | 2 +- ...book_product_metrics_altmetrics_pilot.json | 34 ++++++++----------- ...book_product_metrics_amazon_ltd_pilot.json | 34 ++++++++----------- .../book_product_metrics_amazon_pilot.json | 34 ++++++++----------- ...roduct_metrics_internet_archive_pilot.json | 34 ++++++++----------- ...duct_metrics_scholarcommons_ltd_pilot.json | 34 ++++++++----------- ..._metrics_scholarspace_downloads_pilot.json | 34 ++++++++----------- ...duct_metrics_scholarspace_views_pilot.json | 34 ++++++++----------- .../book_product_metrics_sci_open_pilot.json | 34 ++++++++----------- ...k_product_body_altmetrics_pilot.sql.jinja2 | 6 ++-- ..._product_body_amazon_ltd_pilot.sql.jinja2} | 6 ++-- .../book_product_body_amazon_pilot.sql.jinja2 | 6 ++-- ...uct_body_internet_archive_pilot.sql.jinja2 | 6 ++-- ...t_body_scholarcommons_ltd_pilot.sql.jinja2 | 6 ++-- ...dy_scholarspace_downloads_pilot.sql.jinja2 | 6 ++-- ...t_body_scholarspace_views_pilot.sql.jinja2 | 6 ++-- ...ook_product_body_sci_open_pilot.sql.jinja2 | 6 ++-- 17 files changed, 137 insertions(+), 185 deletions(-) rename dags/oaebu_workflows/sql/shmp_pilot/{book_product_body_amazon_ltd.sql.jinja => book_product_body_amazon_ltd_pilot.sql.jinja2} (62%) diff --git a/dags/oaebu_workflows/onix_workflow/onix_workflow.py b/dags/oaebu_workflows/onix_workflow/onix_workflow.py index 1d2b9c93..5d58d232 100644 --- a/dags/oaebu_workflows/onix_workflow/onix_workflow.py +++ b/dags/oaebu_workflows/onix_workflow/onix_workflow.py @@ -19,7 +19,7 @@ import logging import os import re -from typing import Iterable, List, Optional, Tuple, Union +from typing import Iterable, List, Optional, Tuple, Union, Dict import jsonlines import pendulum diff --git a/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_altmetrics_pilot.json b/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_altmetrics_pilot.json index a2d5cfbf..ad5fee01 100644 --- a/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_altmetrics_pilot.json +++ b/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_altmetrics_pilot.json @@ -1,20 +1,14 @@ -[ - { - "mode": "REQUIRED", - "name": "ISBN13", - "type": "STRING", - "description": "13 Digit ISBN of the book." - }, - { - "mode": "REQUIRED", - "name": "value", - "type": "INTEGER", - "description": "Measured value." - }, - { - "mode": "REQUIRED", - "name": "release_date", - "type": "DATE", - "description": "The end date of the release month" - } -] +{ + "fields": [ + { + "mode": "NULLABLE", + "name": "value", + "type": "INTEGER", + "description": "Metric value" + } + ], + "mode": "NULLABLE", + "name": "altmetrics_pilot", + "type": "RECORD", + "description": "Metrics derived from altmetrics" +} diff --git a/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_amazon_ltd_pilot.json b/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_amazon_ltd_pilot.json index a2d5cfbf..ea089c67 100644 --- a/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_amazon_ltd_pilot.json +++ b/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_amazon_ltd_pilot.json @@ -1,20 +1,14 @@ -[ - { - "mode": "REQUIRED", - "name": "ISBN13", - "type": "STRING", - "description": "13 Digit ISBN of the book." - }, - { - "mode": "REQUIRED", - "name": "value", - "type": "INTEGER", - "description": "Measured value." - }, - { - "mode": "REQUIRED", - "name": "release_date", - "type": "DATE", - "description": "The end date of the release month" - } -] +{ + "fields": [ + { + "mode": "NULLABLE", + "name": "value", + "type": "INTEGER", + "description": "Metric value" + } + ], + "mode": "NULLABLE", + "name": "amazon_ltd_pilot", + "type": "RECORD", + "description": "Metrics derived from amazon_ltd" +} diff --git a/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_amazon_pilot.json b/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_amazon_pilot.json index a2d5cfbf..6dc1481f 100644 --- a/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_amazon_pilot.json +++ b/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_amazon_pilot.json @@ -1,20 +1,14 @@ -[ - { - "mode": "REQUIRED", - "name": "ISBN13", - "type": "STRING", - "description": "13 Digit ISBN of the book." - }, - { - "mode": "REQUIRED", - "name": "value", - "type": "INTEGER", - "description": "Measured value." - }, - { - "mode": "REQUIRED", - "name": "release_date", - "type": "DATE", - "description": "The end date of the release month" - } -] +{ + "fields": [ + { + "mode": "NULLABLE", + "name": "value", + "type": "INTEGER", + "description": "Metric value" + } + ], + "mode": "NULLABLE", + "name": "amazon_pilot", + "type": "RECORD", + "description": "Metrics derived from amazon" +} diff --git a/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_internet_archive_pilot.json b/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_internet_archive_pilot.json index a2d5cfbf..81a73a1a 100644 --- a/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_internet_archive_pilot.json +++ b/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_internet_archive_pilot.json @@ -1,20 +1,14 @@ -[ - { - "mode": "REQUIRED", - "name": "ISBN13", - "type": "STRING", - "description": "13 Digit ISBN of the book." - }, - { - "mode": "REQUIRED", - "name": "value", - "type": "INTEGER", - "description": "Measured value." - }, - { - "mode": "REQUIRED", - "name": "release_date", - "type": "DATE", - "description": "The end date of the release month" - } -] +{ + "fields": [ + { + "mode": "NULLABLE", + "name": "value", + "type": "INTEGER", + "description": "Metric value" + } + ], + "mode": "NULLABLE", + "name": "internet_archive_pilot", + "type": "RECORD", + "description": "Metrics derived from internet_archive" +} diff --git a/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_scholarcommons_ltd_pilot.json b/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_scholarcommons_ltd_pilot.json index a2d5cfbf..1fbadd74 100644 --- a/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_scholarcommons_ltd_pilot.json +++ b/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_scholarcommons_ltd_pilot.json @@ -1,20 +1,14 @@ -[ - { - "mode": "REQUIRED", - "name": "ISBN13", - "type": "STRING", - "description": "13 Digit ISBN of the book." - }, - { - "mode": "REQUIRED", - "name": "value", - "type": "INTEGER", - "description": "Measured value." - }, - { - "mode": "REQUIRED", - "name": "release_date", - "type": "DATE", - "description": "The end date of the release month" - } -] +{ + "fields": [ + { + "mode": "NULLABLE", + "name": "value", + "type": "INTEGER", + "description": "Metric value" + } + ], + "mode": "NULLABLE", + "name": "scholarcommons_ltd_pilot", + "type": "RECORD", + "description": "Metrics derived from scholarcommons_ltd" +} diff --git a/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_scholarspace_downloads_pilot.json b/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_scholarspace_downloads_pilot.json index a2d5cfbf..2aeeeac4 100644 --- a/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_scholarspace_downloads_pilot.json +++ b/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_scholarspace_downloads_pilot.json @@ -1,20 +1,14 @@ -[ - { - "mode": "REQUIRED", - "name": "ISBN13", - "type": "STRING", - "description": "13 Digit ISBN of the book." - }, - { - "mode": "REQUIRED", - "name": "value", - "type": "INTEGER", - "description": "Measured value." - }, - { - "mode": "REQUIRED", - "name": "release_date", - "type": "DATE", - "description": "The end date of the release month" - } -] +{ + "fields": [ + { + "mode": "NULLABLE", + "name": "value", + "type": "INTEGER", + "description": "Metric value" + } + ], + "mode": "NULLABLE", + "name": "scholarspace_downloads_pilot", + "type": "RECORD", + "description": "Metrics derived from scholarspace_downloads" +} diff --git a/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_scholarspace_views_pilot.json b/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_scholarspace_views_pilot.json index a2d5cfbf..3308b57c 100644 --- a/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_scholarspace_views_pilot.json +++ b/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_scholarspace_views_pilot.json @@ -1,20 +1,14 @@ -[ - { - "mode": "REQUIRED", - "name": "ISBN13", - "type": "STRING", - "description": "13 Digit ISBN of the book." - }, - { - "mode": "REQUIRED", - "name": "value", - "type": "INTEGER", - "description": "Measured value." - }, - { - "mode": "REQUIRED", - "name": "release_date", - "type": "DATE", - "description": "The end date of the release month" - } -] +{ + "fields": [ + { + "mode": "NULLABLE", + "name": "value", + "type": "INTEGER", + "description": "Metric value" + } + ], + "mode": "NULLABLE", + "name": "scholarspace_views_pilot", + "type": "RECORD", + "description": "Metrics derived from scholarspace_views" +} diff --git a/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_sci_open_pilot.json b/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_sci_open_pilot.json index a2d5cfbf..e903048d 100644 --- a/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_sci_open_pilot.json +++ b/dags/oaebu_workflows/schema/shmp_pilot/book_product_metrics_sci_open_pilot.json @@ -1,20 +1,14 @@ -[ - { - "mode": "REQUIRED", - "name": "ISBN13", - "type": "STRING", - "description": "13 Digit ISBN of the book." - }, - { - "mode": "REQUIRED", - "name": "value", - "type": "INTEGER", - "description": "Measured value." - }, - { - "mode": "REQUIRED", - "name": "release_date", - "type": "DATE", - "description": "The end date of the release month" - } -] +{ + "fields": [ + { + "mode": "NULLABLE", + "name": "value", + "type": "INTEGER", + "description": "Metric value" + } + ], + "mode": "NULLABLE", + "name": "sci_open_pilot", + "type": "RECORD", + "description": "Metrics derived from sci_open" +} diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_altmetrics_pilot.sql.jinja2 b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_altmetrics_pilot.sql.jinja2 index 0d324c2a..e7c1a713 100644 --- a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_altmetrics_pilot.sql.jinja2 +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_altmetrics_pilot.sql.jinja2 @@ -1,11 +1,11 @@ -altmetrics_metrics as ( +altmetrics_pilot_metrics as ( SELECT ISBN13, release_date, value, FROM - `{{ altmetrics_table_id }}` + `{{ altmetrics_pilot_table_id }}` GROUP BY ISBN13, release_date -), +) diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_amazon_ltd.sql.jinja b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_amazon_ltd_pilot.sql.jinja2 similarity index 62% rename from dags/oaebu_workflows/sql/shmp_pilot/book_product_body_amazon_ltd.sql.jinja rename to dags/oaebu_workflows/sql/shmp_pilot/book_product_body_amazon_ltd_pilot.sql.jinja2 index 64115318..9cd46f93 100644 --- a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_amazon_ltd.sql.jinja +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_amazon_ltd_pilot.sql.jinja2 @@ -1,11 +1,11 @@ -amazon_ltd_metrics as ( +amazon_ltd_pilot_metrics as ( SELECT ISBN13, release_date, value, FROM - `{{ amazon_ltd_table_id }}` + `{{ amazon_ltd_pilot_table_id }}` GROUP BY ISBN13, release_date -), +) diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_amazon_pilot.sql.jinja2 b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_amazon_pilot.sql.jinja2 index eaf7e267..ade7bd9e 100644 --- a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_amazon_pilot.sql.jinja2 +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_amazon_pilot.sql.jinja2 @@ -1,11 +1,11 @@ -amazon_metrics as ( +amazon_pilot_metrics as ( SELECT ISBN13, release_date, value, FROM - `{{ amazon_table_id }}` + `{{ amazon_pilot_table_id }}` GROUP BY ISBN13, release_date -), +) diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_internet_archive_pilot.sql.jinja2 b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_internet_archive_pilot.sql.jinja2 index fc4b945e..8490c6b2 100644 --- a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_internet_archive_pilot.sql.jinja2 +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_internet_archive_pilot.sql.jinja2 @@ -1,11 +1,11 @@ -internet_archive_metrics as ( +internet_archive_pilot_metrics as ( SELECT ISBN13, release_date, value, FROM - `{{ internet_archive_table_id }}` + `{{ internet_archive_pilot_table_id }}` GROUP BY ISBN13, release_date -), +) diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarcommons_ltd_pilot.sql.jinja2 b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarcommons_ltd_pilot.sql.jinja2 index 957a8ad4..2f71647f 100644 --- a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarcommons_ltd_pilot.sql.jinja2 +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarcommons_ltd_pilot.sql.jinja2 @@ -1,11 +1,11 @@ -scholarcommons_ltd_metrics as ( +scholarcommons_ltd_pilot_metrics as ( SELECT ISBN13, release_date, value, FROM - `{{ scholarcommons_ltd_table_id }}` + `{{ scholarcommons_ltd_pilot_table_id }}` GROUP BY ISBN13, release_date -), +) diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarspace_downloads_pilot.sql.jinja2 b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarspace_downloads_pilot.sql.jinja2 index 70065785..fb16f256 100644 --- a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarspace_downloads_pilot.sql.jinja2 +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarspace_downloads_pilot.sql.jinja2 @@ -1,11 +1,11 @@ -scholarspace_downloads_metrics as ( +scholarspace_downloads_pilot_metrics as ( SELECT ISBN13, release_date, value, FROM - `{{ scholarspace_downloads_table_id }}` + `{{ scholarspace_downloads_pilot_table_id }}` GROUP BY ISBN13, release_date -), +) diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarspace_views_pilot.sql.jinja2 b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarspace_views_pilot.sql.jinja2 index 9bf0890a..cb084e02 100644 --- a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarspace_views_pilot.sql.jinja2 +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarspace_views_pilot.sql.jinja2 @@ -1,11 +1,11 @@ -scholarspace_views_metrics as ( +scholarspace_views_pilot_metrics as ( SELECT ISBN13, release_date, value, FROM - `{{ scholarspace_views_table_id }}` + `{{ scholarspace_views_pilot_table_id }}` GROUP BY ISBN13, release_date -), +) diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_sci_open_pilot.sql.jinja2 b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_sci_open_pilot.sql.jinja2 index 0d324c2a..e1b21be1 100644 --- a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_sci_open_pilot.sql.jinja2 +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_sci_open_pilot.sql.jinja2 @@ -1,11 +1,11 @@ -altmetrics_metrics as ( +sci_open_pilot_metrics as ( SELECT ISBN13, release_date, value, FROM - `{{ altmetrics_table_id }}` + `{{ sci_open_pilot_table_id }}` GROUP BY ISBN13, release_date -), +)