From 2b240f55cad22d51bfcddda5144b4e76bbaa5596 Mon Sep 17 00:00:00 2001 From: Keegan Smith Date: Tue, 8 Aug 2023 06:33:53 +0800 Subject: [PATCH] Fixed irus names (#168) --- docs/oaebu_workflows/telescopes/irus_oapen.md | 8 ++--- oaebu_workflows/api_type_ids.py | 4 +-- .../oaebu-author-metrics-mappings.json.jinja2 | 2 +- .../oaebu-metrics-city-mappings.json.jinja2 | 2 +- ...oaebu-metrics-country-mappings.json.jinja2 | 2 +- .../oaebu-metrics-mappings.json.jinja2 | 2 +- ...ebu-publisher-metrics-mappings.json.jinja2 | 2 +- ...u-subject-bic-metrics-mappings.json.jinja2 | 2 +- ...subject-bisac-metrics-mappings.json.jinja2 | 2 +- ...subject-thema-metrics-mappings.json.jinja2 | 2 +- ...-subject-year-metrics-mappings.json.jinja2 | 2 +- .../oaebu-year-metrics-mappings.json.jinja2 | 2 +- .../workflows/irus_fulcrum_telescope.py | 10 +++---- .../workflows/irus_oapen_telescope.py | 29 ++++++++----------- .../tests/test_irus_fulcrum_telescope.py | 2 +- .../tests/test_irus_oapen_telescope.py | 4 +-- 16 files changed, 36 insertions(+), 41 deletions(-) diff --git a/docs/oaebu_workflows/telescopes/irus_oapen.md b/docs/oaebu_workflows/telescopes/irus_oapen.md index 341d954d..a667c281 100644 --- a/docs/oaebu_workflows/telescopes/irus_oapen.md +++ b/docs/oaebu_workflows/telescopes/irus_oapen.md @@ -156,10 +156,10 @@ In the 'permissions' tab, click 'Grant Access', add the airflow service account Note that all values need to be urlencoded. In the config.yaml file, the following airflow connections are required: -### oapen_irus_uk_login +### irus_oapen_login To get the email address/password combination, contact IRUS. -### oapen_irus_uk_api +### irus_oapen_api To get the requestor_id/api_key, contact IRUS. ### geoip_license_key @@ -170,8 +170,8 @@ Then, click on 'Generate new license key', this can be used for the 'license_key Answer \_No* for the question: "Old versions of our GeoIP Update program use a different license key format. Will this key be used for GeoIP Update?" ```yaml -oapen_irus_uk_login: mysql://email_address:password@ -oapen_irus_uk_api: mysql://requestor_id:api_key@ +irus_oapen_login: mysql://email_address:password@ +irus_oapen_api: mysql://requestor_id:api_key@ geoip_license_key: mysql://user_id:license_key@ ``` diff --git a/oaebu_workflows/api_type_ids.py b/oaebu_workflows/api_type_ids.py index 184a41d7..7d95b130 100644 --- a/oaebu_workflows/api_type_ids.py +++ b/oaebu_workflows/api_type_ids.py @@ -36,7 +36,7 @@ class DatasetTypeId: google_books_traffic = "google_books_traffic" jstor_country = "jstor_country" jstor_institution = "jstor_institution" - oapen_irus_uk = "oapen_irus_uk" + irus_oapen = "irus_oapen" ucl_discovery = "ucl_discovery" fulcrum = "fulcrum" @@ -55,7 +55,7 @@ class WorkflowTypeId: google_analytics = "google_analytics" google_books = "google_books" jstor = "jstor" - oapen_irus_uk = "oapen_irus_uk" + irus_oapen = "irus_oapen" ucl_discovery = "ucl_discovery" fulcrum = "fulcrum" diff --git a/oaebu_workflows/database/mappings/oaebu-author-metrics-mappings.json.jinja2 b/oaebu_workflows/database/mappings/oaebu-author-metrics-mappings.json.jinja2 index 6c837b75..719c808f 100644 --- a/oaebu_workflows/database/mappings/oaebu-author-metrics-mappings.json.jinja2 +++ b/oaebu_workflows/database/mappings/oaebu-author-metrics-mappings.json.jinja2 @@ -100,7 +100,7 @@ } } }, - "oapen_irus_uk": { + "irus_oapen": { "properties": { "title_requests": { "type": "integer" diff --git a/oaebu_workflows/database/mappings/oaebu-metrics-city-mappings.json.jinja2 b/oaebu_workflows/database/mappings/oaebu-metrics-city-mappings.json.jinja2 index 8f38f913..4d669fc4 100644 --- a/oaebu_workflows/database/mappings/oaebu-metrics-city-mappings.json.jinja2 +++ b/oaebu_workflows/database/mappings/oaebu-metrics-city-mappings.json.jinja2 @@ -62,7 +62,7 @@ "coordinates": { "type": "geo_point" }, - "oapen_irus_uk": { + "irus_oapen": { "properties": { "title_requests": { "type": "integer" diff --git a/oaebu_workflows/database/mappings/oaebu-metrics-country-mappings.json.jinja2 b/oaebu_workflows/database/mappings/oaebu-metrics-country-mappings.json.jinja2 index 2fdf7cc9..ecee73b8 100644 --- a/oaebu_workflows/database/mappings/oaebu-metrics-country-mappings.json.jinja2 +++ b/oaebu_workflows/database/mappings/oaebu-metrics-country-mappings.json.jinja2 @@ -75,7 +75,7 @@ } } }, - "oapen_irus_uk": { + "irus_oapen": { "properties": { "title_requests": { "type": "integer" diff --git a/oaebu_workflows/database/mappings/oaebu-metrics-mappings.json.jinja2 b/oaebu_workflows/database/mappings/oaebu-metrics-mappings.json.jinja2 index 5ffd0f25..966f5e65 100644 --- a/oaebu_workflows/database/mappings/oaebu-metrics-mappings.json.jinja2 +++ b/oaebu_workflows/database/mappings/oaebu-metrics-mappings.json.jinja2 @@ -143,7 +143,7 @@ } } }, - "oapen_irus_uk": { + "irus_oapen": { "properties": { "title_requests": { "type": "integer" diff --git a/oaebu_workflows/database/mappings/oaebu-publisher-metrics-mappings.json.jinja2 b/oaebu_workflows/database/mappings/oaebu-publisher-metrics-mappings.json.jinja2 index 0aa9d216..4d15f145 100644 --- a/oaebu_workflows/database/mappings/oaebu-publisher-metrics-mappings.json.jinja2 +++ b/oaebu_workflows/database/mappings/oaebu-publisher-metrics-mappings.json.jinja2 @@ -79,7 +79,7 @@ } } }, - "oapen_irus_uk": { + "irus_oapen": { "properties": { "title_requests": { "type": "integer" diff --git a/oaebu_workflows/database/mappings/oaebu-subject-bic-metrics-mappings.json.jinja2 b/oaebu_workflows/database/mappings/oaebu-subject-bic-metrics-mappings.json.jinja2 index cd889799..372d19ca 100644 --- a/oaebu_workflows/database/mappings/oaebu-subject-bic-metrics-mappings.json.jinja2 +++ b/oaebu_workflows/database/mappings/oaebu-subject-bic-metrics-mappings.json.jinja2 @@ -97,7 +97,7 @@ } } }, - "oapen_irus_uk": { + "irus_oapen": { "properties": { "title_requests": { "type": "integer" diff --git a/oaebu_workflows/database/mappings/oaebu-subject-bisac-metrics-mappings.json.jinja2 b/oaebu_workflows/database/mappings/oaebu-subject-bisac-metrics-mappings.json.jinja2 index cd889799..372d19ca 100644 --- a/oaebu_workflows/database/mappings/oaebu-subject-bisac-metrics-mappings.json.jinja2 +++ b/oaebu_workflows/database/mappings/oaebu-subject-bisac-metrics-mappings.json.jinja2 @@ -97,7 +97,7 @@ } } }, - "oapen_irus_uk": { + "irus_oapen": { "properties": { "title_requests": { "type": "integer" diff --git a/oaebu_workflows/database/mappings/oaebu-subject-thema-metrics-mappings.json.jinja2 b/oaebu_workflows/database/mappings/oaebu-subject-thema-metrics-mappings.json.jinja2 index cd889799..372d19ca 100644 --- a/oaebu_workflows/database/mappings/oaebu-subject-thema-metrics-mappings.json.jinja2 +++ b/oaebu_workflows/database/mappings/oaebu-subject-thema-metrics-mappings.json.jinja2 @@ -97,7 +97,7 @@ } } }, - "oapen_irus_uk": { + "irus_oapen": { "properties": { "title_requests": { "type": "integer" diff --git a/oaebu_workflows/database/mappings/oaebu-subject-year-metrics-mappings.json.jinja2 b/oaebu_workflows/database/mappings/oaebu-subject-year-metrics-mappings.json.jinja2 index 48c08bf0..b16a7aea 100644 --- a/oaebu_workflows/database/mappings/oaebu-subject-year-metrics-mappings.json.jinja2 +++ b/oaebu_workflows/database/mappings/oaebu-subject-year-metrics-mappings.json.jinja2 @@ -97,7 +97,7 @@ } } }, - "oapen_irus_uk": { + "irus_oapen": { "properties": { "title_requests": { "type": "integer" diff --git a/oaebu_workflows/database/mappings/oaebu-year-metrics-mappings.json.jinja2 b/oaebu_workflows/database/mappings/oaebu-year-metrics-mappings.json.jinja2 index 8ee7cc0a..28f84c4c 100644 --- a/oaebu_workflows/database/mappings/oaebu-year-metrics-mappings.json.jinja2 +++ b/oaebu_workflows/database/mappings/oaebu-year-metrics-mappings.json.jinja2 @@ -88,7 +88,7 @@ } } }, - "oapen_irus_uk": { + "irus_oapen": { "properties": { "title_requests": { "type": "integer" diff --git a/oaebu_workflows/workflows/irus_fulcrum_telescope.py b/oaebu_workflows/workflows/irus_fulcrum_telescope.py index 41169de7..0e7f21f4 100644 --- a/oaebu_workflows/workflows/irus_fulcrum_telescope.py +++ b/oaebu_workflows/workflows/irus_fulcrum_telescope.py @@ -84,7 +84,7 @@ def __init__( api_dataset_id: str = "fulcrum", schema_folder: str = default_schema_folder(), observatory_api_conn_id: str = AirflowConns.OBSERVATORY_API, - oapen_irus_api_conn_id: str = "oapen_irus_uk_api", + irus_oapen_api_conn_id: str = "irus_api", catchup: bool = True, schedule_interval: str = "0 0 4 * *", # Run on the 4th of every month start_date: pendulum.DateTime = pendulum.datetime(2022, 4, 1), # Earliest available data @@ -100,7 +100,7 @@ def __init__( :param api_dataset_id: The ID to store the dataset release in the API :param schema_folder: The path to the SQL schema folder :param observatory_api_conn_id: Airflow connection ID for the overvatory API - :param oapen_irus_api_conn_id: Airflow connection ID OAPEN IRUS UK (counter 5) + :param irus_oapen_api_conn_id: Airflow connection ID OAPEN IRUS UK (counter 5) :param catchup: Whether to catchup the DAG or not :param schedule_interval: The schedule interval of the DAG :param start_date: The start date of the DAG @@ -112,7 +112,7 @@ def __init__( dag_id, start_date, schedule_interval, - airflow_conns=[observatory_api_conn_id, oapen_irus_api_conn_id], + airflow_conns=[observatory_api_conn_id, irus_oapen_api_conn_id], catchup=catchup, tags=["oaebu"], ) @@ -127,7 +127,7 @@ def __init__( self.api_dataset_id = api_dataset_id self.schema_folder = schema_folder self.observatory_api_conn_id = observatory_api_conn_id - self.oapen_irus_api_conn_id = oapen_irus_api_conn_id + self.irus_oapen_api_conn_id = irus_oapen_api_conn_id check_workflow_inputs(self) @@ -164,7 +164,7 @@ def download(self, release: IrusFulcrumRelease, **kwargs): :param releases: the IrusFulcrumRelease instance. """ - requestor_id = BaseHook.get_connection(self.oapen_irus_api_conn_id).login + requestor_id = BaseHook.get_connection(self.irus_oapen_api_conn_id).login totals_data, country_data = download_fulcrum_month_data(release.partition_date, requestor_id) assert totals_data and country_data, f"Data not available for supplied release month: {release.partition_date}" save_jsonl_gz(release.download_totals_path, totals_data) diff --git a/oaebu_workflows/workflows/irus_oapen_telescope.py b/oaebu_workflows/workflows/irus_oapen_telescope.py index 87ab9cb9..067617bc 100644 --- a/oaebu_workflows/workflows/irus_oapen_telescope.py +++ b/oaebu_workflows/workflows/irus_oapen_telescope.py @@ -114,11 +114,11 @@ def __init__( max_cloud_function_instances: int = 0, observatory_api_conn_id: str = AirflowConns.OBSERVATORY_API, geoip_license_conn_id: str = "geoip_license_key", - oapen_irus_api_conn_id: str = "irus_api", - oapen_irus_login_conn_id: str = "irus_login", + irus_oapen_api_conn_id: str = "irus_api", + irus_oapen_login_conn_id: str = "irus_login", catchup: bool = True, start_date: pendulum.DateTime = pendulum.datetime(2015, 6, 1), - schedule_interval: str = "0 0 4 * *", # Run on the 4th of every month + schedule_interval: str = "0 0 4 * *", # Run on the 4th of every month max_active_runs: int = 5, ): """The OAPEN irus uk telescope. @@ -135,20 +135,15 @@ def __init__( :param max_cloud_function_instances: :param observatory_api_conn_id: Airflow connection ID for the overvatory API :param geoip_license_conn_id: The Airflow connection ID for the GEOIP license - :param oapen_irus_api_conn_id: The Airflow connection ID for IRUS API - for counter 5 - :param oapen_irus_login_conn_id: The Airflow connection ID for IRUS API (login) - for counter 4 + :param irus_oapen_api_conn_id: The Airflow connection ID for IRUS API - for counter 5 + :param irus_oapen_login_conn_id: The Airflow connection ID for IRUS API (login) - for counter 4 :param catchup: Whether to catchup the DAG or not :param start_date: The start date of the DAG :param schedule_interval: The schedule interval of the DAG :param max_active_runs: The maximum number of concurrent DAG instances """ if bq_table_description is None: - bq_table_description = { - "irus_oapen": "Metrics from IRUS OAPEN. Before 2020-04 " - "from: https://irus.jisc.ac.uk/IRUSConsult/irus-oapen/v2/. " - "After 2020-04 from the OAPEN_SUSHI API (documentation not " - "published)." - } + bq_table_description = "OAPEN metrics as recorded by the IRUS platform" super().__init__( dag_id, @@ -158,8 +153,8 @@ def __init__( airflow_conns=[ observatory_api_conn_id, geoip_license_conn_id, - oapen_irus_api_conn_id, - oapen_irus_login_conn_id, + irus_oapen_api_conn_id, + irus_oapen_login_conn_id, ], max_active_runs=max_active_runs, tags=["oaebu"], @@ -177,8 +172,8 @@ def __init__( self.max_cloud_function_instances = max_cloud_function_instances self.observatory_api_conn_id = observatory_api_conn_id self.geoip_license_conn_id = geoip_license_conn_id - self.oapen_irus_api_conn_id = oapen_irus_api_conn_id - self.oapen_irus_login_conn_id = oapen_irus_login_conn_id + self.irus_oapen_api_conn_id = irus_oapen_api_conn_id + self.irus_oapen_login_conn_id = irus_oapen_login_conn_id check_workflow_inputs(self) @@ -317,9 +312,9 @@ def call_cloud_function(self, releases: List[IrusOapenRelease], **kwargs): # get the publisher_uuid or publisher_id, both are set to empty strings when publisher id is 'oapen' if release.partition_date >= pendulum.datetime(2020, 4, 1): - airflow_conn = self.oapen_irus_api_conn_id + airflow_conn = self.irus_oapen_api_conn_id else: - airflow_conn = self.oapen_irus_login_conn_id + airflow_conn = self.irus_oapen_login_conn_id username = BaseHook.get_connection(airflow_conn).login password = BaseHook.get_connection(airflow_conn).password diff --git a/oaebu_workflows/workflows/tests/test_irus_fulcrum_telescope.py b/oaebu_workflows/workflows/tests/test_irus_fulcrum_telescope.py index 67b3b989..71ee513c 100644 --- a/oaebu_workflows/workflows/tests/test_irus_fulcrum_telescope.py +++ b/oaebu_workflows/workflows/tests/test_irus_fulcrum_telescope.py @@ -115,7 +115,7 @@ def test_telescope(self): bq_dataset_id=env.add_dataset(), ) dag = telescope.make_dag() - env.add_connection(Connection(conn_id=telescope.oapen_irus_api_conn_id, uri=f"http://fake_api_login:@")) + env.add_connection(Connection(conn_id=telescope.irus_oapen_api_conn_id, uri=f"http://fake_api_login:@")) # Add the fake requestor ID as a connection with env.create_dag_run(dag, execution_date): diff --git a/oaebu_workflows/workflows/tests/test_irus_oapen_telescope.py b/oaebu_workflows/workflows/tests/test_irus_oapen_telescope.py index a0d20509..f1f10e00 100644 --- a/oaebu_workflows/workflows/tests/test_irus_oapen_telescope.py +++ b/oaebu_workflows/workflows/tests/test_irus_oapen_telescope.py @@ -182,9 +182,9 @@ def test_telescope(self, mock_authorized_session, mock_account_credentials, mock # Add airflow connections conn = Connection(conn_id=telescope.geoip_license_conn_id, uri="http://email_address:password@") env.add_connection(conn) - conn = Connection(conn_id=telescope.oapen_irus_api_conn_id, uri="mysql://requestor_id:api_key@") + conn = Connection(conn_id=telescope.irus_oapen_api_conn_id, uri="mysql://requestor_id:api_key@") env.add_connection(conn) - conn = Connection(conn_id=telescope.oapen_irus_login_conn_id, uri="mysql://user_id:license_key@") + conn = Connection(conn_id=telescope.irus_oapen_login_conn_id, uri="mysql://user_id:license_key@") env.add_connection(conn) # Test that all dependencies are specified: no error should be thrown