Skip to content

Commit

Permalink
Fixed irus names (#168)
Browse files Browse the repository at this point in the history
  • Loading branch information
keegansmith21 authored Aug 7, 2023
1 parent 7e7aae2 commit 2b240f5
Show file tree
Hide file tree
Showing 16 changed files with 36 additions and 41 deletions.
8 changes: 4 additions & 4 deletions docs/oaebu_workflows/telescopes/irus_oapen.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,10 @@ In the 'permissions' tab, click 'Grant Access', add the airflow service account
Note that all values need to be urlencoded.
In the config.yaml file, the following airflow connections are required:

### oapen_irus_uk_login
### irus_oapen_login
To get the email address/password combination, contact IRUS.

### oapen_irus_uk_api
### irus_oapen_api
To get the requestor_id/api_key, contact IRUS.

### geoip_license_key
Expand All @@ -170,8 +170,8 @@ Then, click on 'Generate new license key', this can be used for the 'license_key
Answer \_No* for the question: "Old versions of our GeoIP Update program use a different license key format. Will this key be used for GeoIP Update?"

```yaml
oapen_irus_uk_login: mysql://email_address:password@
oapen_irus_uk_api: mysql://requestor_id:api_key@
irus_oapen_login: mysql://email_address:password@
irus_oapen_api: mysql://requestor_id:api_key@
geoip_license_key: mysql://user_id:license_key@
```
Expand Down
4 changes: 2 additions & 2 deletions oaebu_workflows/api_type_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class DatasetTypeId:
google_books_traffic = "google_books_traffic"
jstor_country = "jstor_country"
jstor_institution = "jstor_institution"
oapen_irus_uk = "oapen_irus_uk"
irus_oapen = "irus_oapen"
ucl_discovery = "ucl_discovery"
fulcrum = "fulcrum"

Expand All @@ -55,7 +55,7 @@ class WorkflowTypeId:
google_analytics = "google_analytics"
google_books = "google_books"
jstor = "jstor"
oapen_irus_uk = "oapen_irus_uk"
irus_oapen = "irus_oapen"
ucl_discovery = "ucl_discovery"
fulcrum = "fulcrum"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@
}
}
},
"oapen_irus_uk": {
"irus_oapen": {
"properties": {
"title_requests": {
"type": "integer"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
"coordinates": {
"type": "geo_point"
},
"oapen_irus_uk": {
"irus_oapen": {
"properties": {
"title_requests": {
"type": "integer"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
}
}
},
"oapen_irus_uk": {
"irus_oapen": {
"properties": {
"title_requests": {
"type": "integer"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@
}
}
},
"oapen_irus_uk": {
"irus_oapen": {
"properties": {
"title_requests": {
"type": "integer"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
}
}
},
"oapen_irus_uk": {
"irus_oapen": {
"properties": {
"title_requests": {
"type": "integer"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@
}
}
},
"oapen_irus_uk": {
"irus_oapen": {
"properties": {
"title_requests": {
"type": "integer"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@
}
}
},
"oapen_irus_uk": {
"irus_oapen": {
"properties": {
"title_requests": {
"type": "integer"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@
}
}
},
"oapen_irus_uk": {
"irus_oapen": {
"properties": {
"title_requests": {
"type": "integer"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@
}
}
},
"oapen_irus_uk": {
"irus_oapen": {
"properties": {
"title_requests": {
"type": "integer"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
}
}
},
"oapen_irus_uk": {
"irus_oapen": {
"properties": {
"title_requests": {
"type": "integer"
Expand Down
10 changes: 5 additions & 5 deletions oaebu_workflows/workflows/irus_fulcrum_telescope.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def __init__(
api_dataset_id: str = "fulcrum",
schema_folder: str = default_schema_folder(),
observatory_api_conn_id: str = AirflowConns.OBSERVATORY_API,
oapen_irus_api_conn_id: str = "oapen_irus_uk_api",
irus_oapen_api_conn_id: str = "irus_api",
catchup: bool = True,
schedule_interval: str = "0 0 4 * *", # Run on the 4th of every month
start_date: pendulum.DateTime = pendulum.datetime(2022, 4, 1), # Earliest available data
Expand All @@ -100,7 +100,7 @@ def __init__(
:param api_dataset_id: The ID to store the dataset release in the API
:param schema_folder: The path to the SQL schema folder
:param observatory_api_conn_id: Airflow connection ID for the overvatory API
:param oapen_irus_api_conn_id: Airflow connection ID OAPEN IRUS UK (counter 5)
:param irus_oapen_api_conn_id: Airflow connection ID OAPEN IRUS UK (counter 5)
:param catchup: Whether to catchup the DAG or not
:param schedule_interval: The schedule interval of the DAG
:param start_date: The start date of the DAG
Expand All @@ -112,7 +112,7 @@ def __init__(
dag_id,
start_date,
schedule_interval,
airflow_conns=[observatory_api_conn_id, oapen_irus_api_conn_id],
airflow_conns=[observatory_api_conn_id, irus_oapen_api_conn_id],
catchup=catchup,
tags=["oaebu"],
)
Expand All @@ -127,7 +127,7 @@ def __init__(
self.api_dataset_id = api_dataset_id
self.schema_folder = schema_folder
self.observatory_api_conn_id = observatory_api_conn_id
self.oapen_irus_api_conn_id = oapen_irus_api_conn_id
self.irus_oapen_api_conn_id = irus_oapen_api_conn_id

check_workflow_inputs(self)

Expand Down Expand Up @@ -164,7 +164,7 @@ def download(self, release: IrusFulcrumRelease, **kwargs):
:param releases: the IrusFulcrumRelease instance.
"""
requestor_id = BaseHook.get_connection(self.oapen_irus_api_conn_id).login
requestor_id = BaseHook.get_connection(self.irus_oapen_api_conn_id).login
totals_data, country_data = download_fulcrum_month_data(release.partition_date, requestor_id)
assert totals_data and country_data, f"Data not available for supplied release month: {release.partition_date}"
save_jsonl_gz(release.download_totals_path, totals_data)
Expand Down
29 changes: 12 additions & 17 deletions oaebu_workflows/workflows/irus_oapen_telescope.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,11 @@ def __init__(
max_cloud_function_instances: int = 0,
observatory_api_conn_id: str = AirflowConns.OBSERVATORY_API,
geoip_license_conn_id: str = "geoip_license_key",
oapen_irus_api_conn_id: str = "irus_api",
oapen_irus_login_conn_id: str = "irus_login",
irus_oapen_api_conn_id: str = "irus_api",
irus_oapen_login_conn_id: str = "irus_login",
catchup: bool = True,
start_date: pendulum.DateTime = pendulum.datetime(2015, 6, 1),
schedule_interval: str = "0 0 4 * *", # Run on the 4th of every month
schedule_interval: str = "0 0 4 * *", # Run on the 4th of every month
max_active_runs: int = 5,
):
"""The OAPEN irus uk telescope.
Expand All @@ -135,20 +135,15 @@ def __init__(
:param max_cloud_function_instances:
:param observatory_api_conn_id: Airflow connection ID for the overvatory API
:param geoip_license_conn_id: The Airflow connection ID for the GEOIP license
:param oapen_irus_api_conn_id: The Airflow connection ID for IRUS API - for counter 5
:param oapen_irus_login_conn_id: The Airflow connection ID for IRUS API (login) - for counter 4
:param irus_oapen_api_conn_id: The Airflow connection ID for IRUS API - for counter 5
:param irus_oapen_login_conn_id: The Airflow connection ID for IRUS API (login) - for counter 4
:param catchup: Whether to catchup the DAG or not
:param start_date: The start date of the DAG
:param schedule_interval: The schedule interval of the DAG
:param max_active_runs: The maximum number of concurrent DAG instances
"""
if bq_table_description is None:
bq_table_description = {
"irus_oapen": "Metrics from IRUS OAPEN. Before 2020-04 "
"from: https://irus.jisc.ac.uk/IRUSConsult/irus-oapen/v2/. "
"After 2020-04 from the OAPEN_SUSHI API (documentation not "
"published)."
}
bq_table_description = "OAPEN metrics as recorded by the IRUS platform"

super().__init__(
dag_id,
Expand All @@ -158,8 +153,8 @@ def __init__(
airflow_conns=[
observatory_api_conn_id,
geoip_license_conn_id,
oapen_irus_api_conn_id,
oapen_irus_login_conn_id,
irus_oapen_api_conn_id,
irus_oapen_login_conn_id,
],
max_active_runs=max_active_runs,
tags=["oaebu"],
Expand All @@ -177,8 +172,8 @@ def __init__(
self.max_cloud_function_instances = max_cloud_function_instances
self.observatory_api_conn_id = observatory_api_conn_id
self.geoip_license_conn_id = geoip_license_conn_id
self.oapen_irus_api_conn_id = oapen_irus_api_conn_id
self.oapen_irus_login_conn_id = oapen_irus_login_conn_id
self.irus_oapen_api_conn_id = irus_oapen_api_conn_id
self.irus_oapen_login_conn_id = irus_oapen_login_conn_id

check_workflow_inputs(self)

Expand Down Expand Up @@ -317,9 +312,9 @@ def call_cloud_function(self, releases: List[IrusOapenRelease], **kwargs):

# get the publisher_uuid or publisher_id, both are set to empty strings when publisher id is 'oapen'
if release.partition_date >= pendulum.datetime(2020, 4, 1):
airflow_conn = self.oapen_irus_api_conn_id
airflow_conn = self.irus_oapen_api_conn_id
else:
airflow_conn = self.oapen_irus_login_conn_id
airflow_conn = self.irus_oapen_login_conn_id
username = BaseHook.get_connection(airflow_conn).login
password = BaseHook.get_connection(airflow_conn).password

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def test_telescope(self):
bq_dataset_id=env.add_dataset(),
)
dag = telescope.make_dag()
env.add_connection(Connection(conn_id=telescope.oapen_irus_api_conn_id, uri=f"http://fake_api_login:@"))
env.add_connection(Connection(conn_id=telescope.irus_oapen_api_conn_id, uri=f"http://fake_api_login:@"))

# Add the fake requestor ID as a connection
with env.create_dag_run(dag, execution_date):
Expand Down
4 changes: 2 additions & 2 deletions oaebu_workflows/workflows/tests/test_irus_oapen_telescope.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,9 @@ def test_telescope(self, mock_authorized_session, mock_account_credentials, mock
# Add airflow connections
conn = Connection(conn_id=telescope.geoip_license_conn_id, uri="http://email_address:password@")
env.add_connection(conn)
conn = Connection(conn_id=telescope.oapen_irus_api_conn_id, uri="mysql://requestor_id:api_key@")
conn = Connection(conn_id=telescope.irus_oapen_api_conn_id, uri="mysql://requestor_id:api_key@")
env.add_connection(conn)
conn = Connection(conn_id=telescope.oapen_irus_login_conn_id, uri="mysql://user_id:license_key@")
conn = Connection(conn_id=telescope.irus_oapen_login_conn_id, uri="mysql://user_id:license_key@")
env.add_connection(conn)

# Test that all dependencies are specified: no error should be thrown
Expand Down

0 comments on commit 2b240f5

Please sign in to comment.