Skip to content

Commit

Permalink
Updated author fields to not exlude nulls
Browse files Browse the repository at this point in the history
  • Loading branch information
keegansmith21 committed May 22, 2024
1 parent 9c26e6c commit baf2ef4
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 37 deletions.
14 changes: 8 additions & 6 deletions dags/oaebu_workflows/onix_workflow/sql/book_list.sql.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,16 @@ SELECT
(SELECT p.publisher_name as publisher_name FROM UNNEST(onix.publisher) as p WHERE p.publishing_role = "Publisher" LIMIT 1) as publisher_name,
onix.title,
onix.keywords,
STRUCT(
PersonName as person_name,
PersonNameInverted as person_name_inverted,
ORCID as ORCID
) as authors,
ARRAY(
SELECT STRUCT(
a.PersonName as person_name,
a.PersonNameInverted as person_name_inverted,
a.ORCID as ORCID
)
FROM UNNEST(onix.authors) as a) as author,
STRUCT(
onix.bic_subjects as bic,
onix.bisac_subjects as bisac,
onix.thema_subjects as thema
) as subjects
FROM `{{ book_product_table_id }}`, UNNEST(onix.authors)
FROM `{{ book_product_table_id }}`
14 changes: 8 additions & 6 deletions dags/oaebu_workflows/onix_workflow/sql/book_metrics.sql.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ SELECT
onix.title,
CAST(onix.published_year as INT64) as published_year,
onix.published_date as published_date,
STRUCT(
PersonName as person_name,
PersonNameInverted as person_name_inverted,
ORCID as ORCID
) as authors,
ARRAY(
SELECT STRUCT(
a.PersonName as person_name,
a.PersonNameInverted as person_name_inverted,
a.ORCID as ORCID
)
FROM UNNEST(onix.authors) as a) as author,
STRUCT(
onix.bic_subjects as bic,
onix.bisac_subjects as bisac,
Expand All @@ -40,7 +42,7 @@ SELECT
{% include dp.files.book_metrics_sql %},
{% endfor %}
STRUCT((SELECT SUM(count) FROM UNNEST(month.crossref_events)) as count) as crossref_events
FROM `{{ book_product_table_id }}`, UNNEST(months) AS month, UNNEST(onix.authors)
FROM `{{ book_product_table_id }}`, UNNEST(months) AS month
WHERE
{% for dp in data_partners | selectattr("export_book_metrics", "equalto", True) %}
{% include dp.files.month_null_sql %} OR
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ WITH publisher_names as (
authors as (
SELECT
ISBN13 as product_id,
STRUCT(
PersonName as person_name,
PersonNameInverted as person_name_inverted,
ORCID
) as authors
FROM `{{ book_product_table_id }}`, UNNEST(onix.authors)
ARRAY(
SELECT STRUCT(
a.PersonName as person_name,
a.PersonNameInverted as person_name_inverted,
a.ORCID as ORCID
)
FROM UNNEST(onix.authors) as a) as author
FROM `{{ book_product_table_id }}`
),

body as (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,13 @@ month_country as (
(SELECT p.publisher_name as publisher_name FROM UNNEST(onix.publisher) as p WHERE p.publishing_role = "Publisher" LIMIT 1) as publisher_name, -- pull the publisher name from the onix.publisher field
CAST(onix.published_year as INT64) as published_year,
onix.published_date as published_date,
STRUCT(
PersonName as person_name,
PersonNameInverted as person_name_inverted,
ORCID as ORCID
) as authors,
ARRAY(
SELECT STRUCT(
a.PersonName as person_name,
a.PersonNameInverted as person_name_inverted,
a.ORCID as ORCID
)
FROM UNNEST(onix.authors) as a) as author,
STRUCT(
onix.bic_subjects as bic,
onix.bisac_subjects as bisac,
Expand All @@ -50,7 +52,7 @@ month_country as (
iso_name as country_iso_name,
wikipedia_name as country_wikipedia_name,
jstor_name as country_jstor_name
FROM months, UNNEST(onix.authors)
FROM months
CROSS JOIN countries
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ WITH publisher_names as (
authors as (
SELECT
ISBN13 as product_id,
STRUCT(
PersonName as person_name,
PersonNameInverted as person_name_inverted,
ORCID
) as authors
FROM `{{ book_product_table_id }}`, UNNEST(onix.authors)
ARRAY(
SELECT STRUCT(
a.PersonName as person_name,
a.PersonNameInverted as person_name_inverted,
a.ORCID as ORCID
)
FROM UNNEST(onix.authors) as a) as author
FROM `{{ book_product_table_id }}`
),

body as (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@ SELECT
(SELECT p.publisher_name as publisher_name FROM UNNEST(onix.publisher) as p WHERE p.publishing_role = "Publisher" LIMIT 1) as publisher_name, -- pull the publisher name from the onix.publisher field
CAST(onix.published_year as INT64) as published_year,
onix.published_date as published_date,
STRUCT(
PersonName as person_name,
PersonNameInverted as person_name_inverted,
ORCID as ORCID
) as authors,
ARRAY(
SELECT STRUCT(
a.PersonName as person_name,
a.PersonNameInverted as person_name_inverted,
a.ORCID as ORCID
)
FROM UNNEST(onix.authors) as a) as author,
STRUCT(
onix.bic_subjects as bic,
onix.bisac_subjects as bisac,
Expand All @@ -39,5 +41,5 @@ SELECT
STRUCT(
institution.Total_Item_Requests
) as jstor,
FROM `{{ book_product_table_id }}`, UNNEST(months) as month, UNNEST(month.jstor_institution) as institution, UNNEST(onix.authors)
FROM `{{ book_product_table_id }}`, UNNEST(months) as month, UNNEST(month.jstor_institution) as institution
WHERE ARRAY_LENGTH(month.jstor_institution) > 0
2 changes: 1 addition & 1 deletion tests/onix_workflow/test_onix_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1184,7 +1184,7 @@ def vcr_ignore_condition(request):
export_tables = [
("book_list", 4),
("book_institution_list", 1),
("book_metrics", 6), # 5
("book_metrics", 6),
("book_metrics_country", 32),
("book_metrics_institution", 1),
("book_metrics_author", 3),
Expand Down

0 comments on commit baf2ef4

Please sign in to comment.