From 4a604647e11ae93a793c98e0ade2bd4ae63bd9ed Mon Sep 17 00:00:00 2001 From: keegansmith21 Date: Tue, 14 May 2024 16:53:45 +0800 Subject: [PATCH] Partial impletmentation of sql and schemas --- .../schema/shmp_pilot/generic.json | 26 ++++ .../schema/shmp_pilot/muse.json | 122 ++++++++++++++++++ .../shmp_pilot/book_metrics_altmetrics.sql | 1 + .../sql/shmp_pilot/book_metrics_amazon.sql | 1 + .../shmp_pilot/book_metrics_amazon_ltd.sql | 1 + .../book_metrics_internet_archive.sql | 1 + .../book_metrics_scholarcommons_ltd.sql | 1 + .../book_metrics_scholarspace_downloads.sql | 1 + .../book_metrics_scholarspace_views.sql | 1 + .../sql/shmp_pilot/book_metrics_sci_open.sql | 1 + .../book_product_body_altmetrics.sql.jinja2 | 11 ++ .../book_product_body_amazon.sql.jinja2 | 11 ++ .../book_product_body_amazon_ltd.sql.jinja | 11 ++ ...k_product_body_internet_archive.sql.jinja2 | 11 ++ ...product_body_scholarcommons_ltd.sql.jinja2 | 11 ++ ...uct_body_scholarspace_downloads.sql.jinja2 | 11 ++ ...product_body_scholarspace_views.sql.jinja2 | 11 ++ .../book_product_body_sci_open.sql.jinja2 | 11 ++ .../sql/shmp_pilot/month_null_altmetrics.sql | 1 + .../sql/shmp_pilot/month_null_amazon.sql | 1 + .../sql/shmp_pilot/month_null_amazon_ltd.sql | 1 + .../month_null_internet_archive.sql | 1 + .../month_null_scholarcommons_ltd.sql | 0 .../month_null_scholarspace_commons_ltd.sql | 1 + .../month_null_scholarspace_downloads.sql | 1 + .../month_null_scholarspace_views.sql | 1 + .../sql/shmp_pilot/month_null_sci_open.sql | 1 + 27 files changed, 252 insertions(+) create mode 100644 dags/oaebu_workflows/schema/shmp_pilot/generic.json create mode 100644 dags/oaebu_workflows/schema/shmp_pilot/muse.json create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/book_metrics_altmetrics.sql create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/book_metrics_amazon.sql create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/book_metrics_amazon_ltd.sql create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/book_metrics_internet_archive.sql create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/book_metrics_scholarcommons_ltd.sql create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/book_metrics_scholarspace_downloads.sql create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/book_metrics_scholarspace_views.sql create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/book_metrics_sci_open.sql create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/book_product_body_altmetrics.sql.jinja2 create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/book_product_body_amazon.sql.jinja2 create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/book_product_body_amazon_ltd.sql.jinja create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/book_product_body_internet_archive.sql.jinja2 create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarcommons_ltd.sql.jinja2 create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarspace_downloads.sql.jinja2 create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarspace_views.sql.jinja2 create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/book_product_body_sci_open.sql.jinja2 create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/month_null_altmetrics.sql create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/month_null_amazon.sql create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/month_null_amazon_ltd.sql create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/month_null_internet_archive.sql create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/month_null_scholarcommons_ltd.sql create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/month_null_scholarspace_commons_ltd.sql create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/month_null_scholarspace_downloads.sql create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/month_null_scholarspace_views.sql create mode 100644 dags/oaebu_workflows/sql/shmp_pilot/month_null_sci_open.sql diff --git a/dags/oaebu_workflows/schema/shmp_pilot/generic.json b/dags/oaebu_workflows/schema/shmp_pilot/generic.json new file mode 100644 index 00000000..6a873c97 --- /dev/null +++ b/dags/oaebu_workflows/schema/shmp_pilot/generic.json @@ -0,0 +1,26 @@ +[ + { + "mode": "REQUIRED", + "name": "ISBN13", + "type": "STRING", + "description": "13 Digit ISBN of the book." + }, + { + "mode": "REQUIRED", + "name": "month", + "type": "DATE", + "description": "Start date for period of analytics info." + }, + { + "mode": "REQUIRED", + "name": "value", + "type": "INTEGER", + "description": "Measured value." + }, + { + "mode": "REQUIRED", + "name": "release_date", + "type": "DATE", + "description": "The end date of the release month" + } +] diff --git a/dags/oaebu_workflows/schema/shmp_pilot/muse.json b/dags/oaebu_workflows/schema/shmp_pilot/muse.json new file mode 100644 index 00000000..7a6e3268 --- /dev/null +++ b/dags/oaebu_workflows/schema/shmp_pilot/muse.json @@ -0,0 +1,122 @@ +[ + { + "description": "The id of the book.", + "mode": "NULLABLE", + "name": "ID", + "type": "STRING" + }, + { + "description": "Publication year.", + "mode": "NULLABLE", + "name": "YEAR", + "type": "INTEGER" + }, + { + "description": "Publication month.", + "mode": "NULLABLE", + "name": "MONTH", + "type": "INTEGER" + }, + { + "description": "Resource type.", + "mode": "NULLABLE", + "name": "RESOURCE_TYPE", + "type": "STRING" + }, + { + "description": "Resource id.", + "mode": "NULLABLE", + "name": "RESOURCE_ID", + "type": "STRING" + }, + { + "description": "ISBN of the book on MUSE.", + "mode": "NULLABLE", + "name": "ISBN", + "type": "STRING" + }, + { + "description": "Title of the book.", + "mode": "NULLABLE", + "name": "RESOURCE", + "type": "STRING" + }, + { + "description": "URL of the book.", + "mode": "NULLABLE", + "name": "RESOURCE_URL", + "type": "STRING" + }, + { + "description": "Date of launch.", + "mode": "NULLABLE", + "name": "RESOURCE_LAUNCH", + "type": "DATE" + }, + { + "description": "Author of the book.", + "mode": "NULLABLE", + "name": "AUTHOR", + "type": "STRING" + }, + { + "description": "Title of the chapter.", + "mode": "NULLABLE", + "name": "FULLTEXT_TITLE", + "type": "STRING" + }, + { + "description": "URL of the chapter.", + "mode": "NULLABLE", + "name": "FULLTEXT_URL", + "type": "STRING" + }, + { + "description": "Date of fulltext launch.", + "mode": "NULLABLE", + "name": "FULLTEXT_LAUNCH", + "type": "DATE" + }, + { + "description": "Issue.", + "mode": "NULLABLE", + "name": "ISSUE", + "type": "STRING" + }, + { + "description": "Format.", + "mode": "NULLABLE", + "name": "FORMAT", + "type": "STRING" + }, + { + "description": "Access type.", + "mode": "NULLABLE", + "name": "ACCESS", + "type": "STRING" + }, + { + "description": "Country Name.", + "mode": "NULLABLE", + "name": "COUNTRY", + "type": "STRING" + }, + { + "description": "Institution name.", + "mode": "NULLABLE", + "name": "INSTITUTION", + "type": "STRING" + }, + { + "description": "Number of requests.", + "mode": "NULLABLE", + "name": "REQUESTS", + "type": "INTEGER" + }, + { + "mode": "REQUIRED", + "name": "release_date", + "type": "DATE", + "description": "Last day of the release month. Table is partitioned on this column." + } +] diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_altmetrics.sql b/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_altmetrics.sql new file mode 100644 index 00000000..5a78c299 --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_altmetrics.sql @@ -0,0 +1 @@ +STRUCT(month.altmetrics.total_downloads) AS altmetrics diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_amazon.sql b/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_amazon.sql new file mode 100644 index 00000000..ba100562 --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_amazon.sql @@ -0,0 +1 @@ +STRUCT(month.amazon.total_downloads) AS amazon diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_amazon_ltd.sql b/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_amazon_ltd.sql new file mode 100644 index 00000000..060b8f63 --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_amazon_ltd.sql @@ -0,0 +1 @@ +STRUCT(month.amazon_ltd.total_downloads) AS amazon_ltd diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_internet_archive.sql b/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_internet_archive.sql new file mode 100644 index 00000000..37e566ca --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_internet_archive.sql @@ -0,0 +1 @@ +STRUCT(month.internet_archive.total_downloads) AS internet_archive diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_scholarcommons_ltd.sql b/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_scholarcommons_ltd.sql new file mode 100644 index 00000000..010e62db --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_scholarcommons_ltd.sql @@ -0,0 +1 @@ +STRUCT(month.scholarcommons_ltd.total_downloads) AS scholarcommons_ltd diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_scholarspace_downloads.sql b/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_scholarspace_downloads.sql new file mode 100644 index 00000000..3d9cc312 --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_scholarspace_downloads.sql @@ -0,0 +1 @@ +STRUCT(month.scholarspace_downloads.total_downloads) AS scholarspace_downloads diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_scholarspace_views.sql b/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_scholarspace_views.sql new file mode 100644 index 00000000..011ccc75 --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_scholarspace_views.sql @@ -0,0 +1 @@ +STRUCT(month.scholarspace_views.total_downloads) AS scholarspace_views diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_sci_open.sql b/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_sci_open.sql new file mode 100644 index 00000000..b02e5fb8 --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_metrics_sci_open.sql @@ -0,0 +1 @@ +STRUCT(month.sci_open.total_downloads) AS sci_open diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_altmetrics.sql.jinja2 b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_altmetrics.sql.jinja2 new file mode 100644 index 00000000..0d324c2a --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_altmetrics.sql.jinja2 @@ -0,0 +1,11 @@ +altmetrics_metrics as ( + SELECT + ISBN13, + release_date, + value, + FROM + `{{ altmetrics_table_id }}` + GROUP BY + ISBN13, + release_date +), diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_amazon.sql.jinja2 b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_amazon.sql.jinja2 new file mode 100644 index 00000000..eaf7e267 --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_amazon.sql.jinja2 @@ -0,0 +1,11 @@ +amazon_metrics as ( + SELECT + ISBN13, + release_date, + value, + FROM + `{{ amazon_table_id }}` + GROUP BY + ISBN13, + release_date +), diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_amazon_ltd.sql.jinja b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_amazon_ltd.sql.jinja new file mode 100644 index 00000000..64115318 --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_amazon_ltd.sql.jinja @@ -0,0 +1,11 @@ +amazon_ltd_metrics as ( + SELECT + ISBN13, + release_date, + value, + FROM + `{{ amazon_ltd_table_id }}` + GROUP BY + ISBN13, + release_date +), diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_internet_archive.sql.jinja2 b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_internet_archive.sql.jinja2 new file mode 100644 index 00000000..fc4b945e --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_internet_archive.sql.jinja2 @@ -0,0 +1,11 @@ +internet_archive_metrics as ( + SELECT + ISBN13, + release_date, + value, + FROM + `{{ internet_archive_table_id }}` + GROUP BY + ISBN13, + release_date +), diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarcommons_ltd.sql.jinja2 b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarcommons_ltd.sql.jinja2 new file mode 100644 index 00000000..957a8ad4 --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarcommons_ltd.sql.jinja2 @@ -0,0 +1,11 @@ +scholarcommons_ltd_metrics as ( + SELECT + ISBN13, + release_date, + value, + FROM + `{{ scholarcommons_ltd_table_id }}` + GROUP BY + ISBN13, + release_date +), diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarspace_downloads.sql.jinja2 b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarspace_downloads.sql.jinja2 new file mode 100644 index 00000000..70065785 --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarspace_downloads.sql.jinja2 @@ -0,0 +1,11 @@ +scholarspace_downloads_metrics as ( + SELECT + ISBN13, + release_date, + value, + FROM + `{{ scholarspace_downloads_table_id }}` + GROUP BY + ISBN13, + release_date +), diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarspace_views.sql.jinja2 b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarspace_views.sql.jinja2 new file mode 100644 index 00000000..9bf0890a --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_scholarspace_views.sql.jinja2 @@ -0,0 +1,11 @@ +scholarspace_views_metrics as ( + SELECT + ISBN13, + release_date, + value, + FROM + `{{ scholarspace_views_table_id }}` + GROUP BY + ISBN13, + release_date +), diff --git a/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_sci_open.sql.jinja2 b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_sci_open.sql.jinja2 new file mode 100644 index 00000000..0d324c2a --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/book_product_body_sci_open.sql.jinja2 @@ -0,0 +1,11 @@ +altmetrics_metrics as ( + SELECT + ISBN13, + release_date, + value, + FROM + `{{ altmetrics_table_id }}` + GROUP BY + ISBN13, + release_date +), diff --git a/dags/oaebu_workflows/sql/shmp_pilot/month_null_altmetrics.sql b/dags/oaebu_workflows/sql/shmp_pilot/month_null_altmetrics.sql new file mode 100644 index 00000000..93d7f7bf --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/month_null_altmetrics.sql @@ -0,0 +1 @@ +month.altmetrics IS NOT NULL diff --git a/dags/oaebu_workflows/sql/shmp_pilot/month_null_amazon.sql b/dags/oaebu_workflows/sql/shmp_pilot/month_null_amazon.sql new file mode 100644 index 00000000..04873f8b --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/month_null_amazon.sql @@ -0,0 +1 @@ +month.amazon IS NOT NULL diff --git a/dags/oaebu_workflows/sql/shmp_pilot/month_null_amazon_ltd.sql b/dags/oaebu_workflows/sql/shmp_pilot/month_null_amazon_ltd.sql new file mode 100644 index 00000000..0cf40c71 --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/month_null_amazon_ltd.sql @@ -0,0 +1 @@ +month.amazon_ltd IS NOT NULL diff --git a/dags/oaebu_workflows/sql/shmp_pilot/month_null_internet_archive.sql b/dags/oaebu_workflows/sql/shmp_pilot/month_null_internet_archive.sql new file mode 100644 index 00000000..70404cc9 --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/month_null_internet_archive.sql @@ -0,0 +1 @@ +month.internet_archive IS NOT NULL diff --git a/dags/oaebu_workflows/sql/shmp_pilot/month_null_scholarcommons_ltd.sql b/dags/oaebu_workflows/sql/shmp_pilot/month_null_scholarcommons_ltd.sql new file mode 100644 index 00000000..e69de29b diff --git a/dags/oaebu_workflows/sql/shmp_pilot/month_null_scholarspace_commons_ltd.sql b/dags/oaebu_workflows/sql/shmp_pilot/month_null_scholarspace_commons_ltd.sql new file mode 100644 index 00000000..b9703a00 --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/month_null_scholarspace_commons_ltd.sql @@ -0,0 +1 @@ +month.scholarspace_commons_ltd IS NOT NULL diff --git a/dags/oaebu_workflows/sql/shmp_pilot/month_null_scholarspace_downloads.sql b/dags/oaebu_workflows/sql/shmp_pilot/month_null_scholarspace_downloads.sql new file mode 100644 index 00000000..d4c41eca --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/month_null_scholarspace_downloads.sql @@ -0,0 +1 @@ +month.scholarspace_downloads IS NOT NULL diff --git a/dags/oaebu_workflows/sql/shmp_pilot/month_null_scholarspace_views.sql b/dags/oaebu_workflows/sql/shmp_pilot/month_null_scholarspace_views.sql new file mode 100644 index 00000000..03bfd08e --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/month_null_scholarspace_views.sql @@ -0,0 +1 @@ +month.scholarspace_views IS NOT NULL diff --git a/dags/oaebu_workflows/sql/shmp_pilot/month_null_sci_open.sql b/dags/oaebu_workflows/sql/shmp_pilot/month_null_sci_open.sql new file mode 100644 index 00000000..1272426f --- /dev/null +++ b/dags/oaebu_workflows/sql/shmp_pilot/month_null_sci_open.sql @@ -0,0 +1 @@ +month.sci_open IS NOT NULL