diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f3b13f7..193484e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,3 +10,8 @@ repos: # pre-commit's default_language_version, see # https://pre-commit.com/#top_level-default_language_version language_version: python3.9 + - repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + args: ["--profile", "black", "--filter-files"] \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index a852e18..2645631 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,13 @@ test = [ dev = [ "bandit", "black==22.12.0", + "isort", "pre-commit", "pylint", "pycodestyle" ] + +[tool.isort] +profile = "black" +src_paths = ["src", "tests"] +skip_glob = [".aws_sam"] \ No newline at end of file diff --git a/scripts/credential_management.py b/scripts/credential_management.py index 6daba3a..5b3b6f6 100755 --- a/scripts/credential_management.py +++ b/scripts/credential_management.py @@ -6,7 +6,6 @@ import sys import boto3 - from requests.auth import _basic_auth_str diff --git a/scripts/cumulus_upload_data.py b/scripts/cumulus_upload_data.py index 090bba0..6975c80 100755 --- a/scripts/cumulus_upload_data.py +++ b/scripts/cumulus_upload_data.py @@ -4,7 +4,6 @@ import argparse import os import sys - from pathlib import Path import boto3 diff --git a/scripts/migrate_versioning.py b/scripts/migrate_versioning.py new file mode 100644 index 0000000..3b555d6 --- /dev/null +++ b/scripts/migrate_versioning.py @@ -0,0 +1,90 @@ +""" Utility for adding versioning to an existing aggregator data store + +This is a one time thing for us, so the CLI/Boto creds are not robust. +""" +import argparse +import boto3 +import io +import json + +from rich import progress + +UPLOAD_ROOT_BUCKETS = ["archive", "error", "last_valid", "latest", "site_upload"] + + +def _get_s3_data(key: str, bucket_name: str, client) -> dict: + """Convenience class for retrieving a dict from S3""" + try: + bytes_buffer = io.BytesIO() + client.download_fileobj(Bucket=bucket_name, Key=key, Fileobj=bytes_buffer) + return json.loads(bytes_buffer.getvalue().decode()) + except Exception: # pylint: disable=broad-except + return {} + + +def _put_s3_data(key: str, bucket_name: str, client, data: dict) -> None: + """Convenience class for writing a dict to S3""" + b_data = io.BytesIO(json.dumps(data).encode()) + client.upload_fileobj(Bucket=bucket_name, Key=key, Fileobj=b_data) + + +def _get_depth(d): + if isinstance(d, dict): + return 1 + (max(map(_get_depth, d.values())) if d else 0) + return 0 + + +def migrate_bucket_versioning(bucket: str): + client = boto3.client("s3") + res = client.list_objects_v2(Bucket=bucket) + contents = res["Contents"] + moved_files = 0 + for s3_file in contents: + if s3_file["Key"].split("/")[0] in UPLOAD_ROOT_BUCKETS: + key = s3_file["Key"] + key_array = key.split("/") + if len(key_array) == 5: + key_array.insert(4, "000") + new_key = "/".join(key_array) + client.copy({"Bucket": bucket, "Key": key}, bucket, new_key) + client.delete_object(Bucket=bucket, Key=key) + moved_files += 1 + print(f"Moved {moved_files} uploads") + + study_periods = _get_s3_data("metadata/study_periods.json", bucket, client) + + if _get_depth(study_periods) == 3: + new_sp = {} + for site in study_periods: + new_sp[site] = {} + for study in study_periods[site]: + new_sp[site][study] = {} + new_sp[site][study]["000"] = study_periods[site][study] + _put_s3_data("metadata/study_periods.json", bucket, client, new_sp) + print("study_periods.json updated") + else: + print("study_periods.json does not need update") + + transactions = _get_s3_data("metadata/transactions.json", bucket, client) + if _get_depth(transactions) == 4: + new_t = {} + for site in transactions: + new_t[site] = {} + for study in transactions[site]: + new_t[site][study] = {} + for dp in transactions[site][study]: + new_t[site][study][dp] = {} + new_t[site][study][dp]["000"] = transactions[site][study][dp] + _put_s3_data("metadata/transactions.json", bucket, client, new_t) + print("transactions.json updated") + else: + print("transactions.json does not need update") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="""Util for migrating aggregator data""" + ) + parser.add_argument("-b", "--bucket", help="bucket name") + args = parser.parse_args() + migrate_bucket_versioning(args.bucket) diff --git a/src/handlers/dashboard/get_chart_data.py b/src/handlers/dashboard/get_chart_data.py index 370a86e..3150965 100644 --- a/src/handlers/dashboard/get_chart_data.py +++ b/src/handlers/dashboard/get_chart_data.py @@ -2,8 +2,7 @@ This is intended to provide an implementation of the logic described in docs/api.md """ import os - -from typing import List, Dict +from typing import Dict, List import awswrangler import boto3 diff --git a/src/handlers/shared/functions.py b/src/handlers/shared/functions.py index 45774fe..8cd2462 100644 --- a/src/handlers/shared/functions.py +++ b/src/handlers/shared/functions.py @@ -1,10 +1,9 @@ """ Functions used across different lambdas""" import io -import logging import json - -from typing import Dict, Optional +import logging from datetime import datetime, timezone +from typing import Dict, Optional import boto3 @@ -75,6 +74,7 @@ def update_metadata( site: str, study: str, data_package: str, + version: str, target: str, dt: Optional[datetime] = None, meta_type: str = JsonFilename.TRANSACTIONS.value, @@ -84,18 +84,20 @@ def update_metadata( if meta_type == JsonFilename.TRANSACTIONS.value: site_metadata = metadata.setdefault(site, {}) study_metadata = site_metadata.setdefault(study, {}) - data_package_metadata = study_metadata.setdefault( - data_package, TRANSACTION_METADATA_TEMPLATE + data_package_metadata = study_metadata.setdefault(data_package, {}) + version_metadata = data_package_metadata.setdefault( + version, TRANSACTION_METADATA_TEMPLATE ) dt = dt or datetime.now(timezone.utc) - data_package_metadata[target] = dt.isoformat() + version_metadata[target] = dt.isoformat() elif meta_type == JsonFilename.STUDY_PERIODS.value: site_metadata = metadata.setdefault(site, {}) - study_period_metadata = site_metadata.setdefault( - study, STUDY_PERIOD_METADATA_TEMPLATE + study_period_metadata = site_metadata.setdefault(study, {}) + version_metadata = study_period_metadata.setdefault( + version, STUDY_PERIOD_METADATA_TEMPLATE ) dt = dt or datetime.now(timezone.utc) - study_period_metadata[target] = dt.isoformat() + version_metadata[target] = dt.isoformat() return metadata diff --git a/src/handlers/site_upload/fetch_upload_url.py b/src/handlers/site_upload/fetch_upload_url.py index 41d5fc9..f1ef8e8 100644 --- a/src/handlers/site_upload/fetch_upload_url.py +++ b/src/handlers/site_upload/fetch_upload_url.py @@ -46,6 +46,6 @@ def upload_url_handler(event, context): res = create_presigned_post( os.environ.get("BUCKET_NAME"), f"{BucketPath.UPLOAD.value}/{body['study']}/{body['data_package']}/" - f"{metadata_db[user]['path']}/{body['filename']}", + f"{body['version']}/{metadata_db[user]['path']}/{body['filename']}", ) return res diff --git a/src/handlers/site_upload/powerset_merge.py b/src/handlers/site_upload/powerset_merge.py index 7ac8e97..d1c768a 100644 --- a/src/handlers/site_upload/powerset_merge.py +++ b/src/handlers/site_upload/powerset_merge.py @@ -3,19 +3,17 @@ import logging import os import traceback - from datetime import datetime, timezone import awswrangler import boto3 import pandas - from numpy import nan from pandas.core.indexes.range import RangeIndex +from src.handlers.shared.awswrangler_functions import get_s3_data_package_list from src.handlers.shared.decorators import generic_error_handler from src.handlers.shared.enums import BucketPath -from src.handlers.shared.awswrangler_functions import get_s3_data_package_list from src.handlers.shared.functions import ( get_s3_site_filename_suffix, http_response, @@ -47,7 +45,7 @@ def __init__(self, event): self.site = s3_key_array[3] self.study = s3_key_array[1] self.data_package = s3_key_array[2] - + self.version = s3_key_array[4] self.metadata = read_metadata(self.s3_client, self.s3_bucket_name) # S3 Filesystem operations @@ -78,7 +76,7 @@ def write_parquet(self, df: pandas.DataFrame, is_new_data_package: bool) -> None """writes dataframe as parquet to s3 and sends an SNS notification if new""" parquet_aggregate_path = ( f"s3://{self.s3_bucket_name}/{BucketPath.AGGREGATE.value}/" - f"{self.study}/{self.study}__{self.data_package}/" + f"{self.study}/{self.study}__{self.data_package}/{self.version}/" f"{self.study}__{self.data_package}__aggregate.parquet" ) awswrangler.s3.to_parquet(df, parquet_aggregate_path, index=False) @@ -92,7 +90,7 @@ def write_csv(self, df: pandas.DataFrame) -> None: """writes dataframe as csv to s3""" csv_aggregate_path = ( f"s3://{self.s3_bucket_name}/{BucketPath.CSVAGGREGATE.value}/" - f"{self.study}/{self.study}__{self.data_package}/" + f"{self.study}/{self.study}__{self.data_package}/{self.version}/" f"{self.study}__{self.data_package}__aggregate.csv" ) df = df.apply(lambda x: x.strip() if isinstance(x, str) else x).replace( @@ -109,7 +107,7 @@ def update_local_metadata(self, key, site=None): if site is None: site = self.site self.metadata = update_metadata( - self.metadata, site, self.study, self.data_package, key + self.metadata, site, self.study, self.data_package, self.version, key ) def write_local_metadata(self): @@ -162,9 +160,7 @@ def expand_and_concat_sets( site_df["site"] = get_static_string_series(None, site_df.index) df_copy["site"] = get_static_string_series(site_name, df_copy.index) - # TODO: we should introduce some kind of data versioning check to see if datasets - # are generated from the same vintage. This naive approach will cause a decent - # amount of data churn we'll have to manage in the interim. + # Did we change the schema without updating the version? if df.empty is False and set(site_df.columns) != set(df.columns): raise MergeError( "Uploaded data has a different schema than last aggregate", diff --git a/src/handlers/site_upload/process_upload.py b/src/handlers/site_upload/process_upload.py index 76b94ce..cee2b14 100644 --- a/src/handlers/site_upload/process_upload.py +++ b/src/handlers/site_upload/process_upload.py @@ -28,8 +28,9 @@ def process_upload(s3_client, sns_client, s3_bucket_name: str, s3_key: str) -> N study = path_params[1] data_package = path_params[2] site = path_params[3] + version = path_params[4] if s3_key.endswith(".parquet"): - if "_meta_" in s3_key: + if "__meta_" in s3_key: new_key = f"{BucketPath.STUDY_META.value}/{s3_key.split('/', 1)[-1]}" topic_sns_arn = os.environ.get("TOPIC_PROCESS_STUDY_META_ARN") sns_subject = "Process study medata upload event" @@ -43,6 +44,7 @@ def process_upload(s3_client, sns_client, s3_bucket_name: str, s3_key: str) -> N site, study, data_package, + version, "last_upload", last_uploaded_date, ) @@ -56,11 +58,18 @@ def process_upload(s3_client, sns_client, s3_bucket_name: str, s3_key: str) -> N site, study, data_package, + version, "last_upload", last_uploaded_date, ) metadata = update_metadata( - metadata, site, study, data_package, "last_error", last_uploaded_date + metadata, + site, + study, + data_package, + version, + "last_error", + last_uploaded_date, ) write_metadata(s3_client, s3_bucket_name, metadata) raise UnexpectedFileTypeError diff --git a/src/handlers/site_upload/study_period.py b/src/handlers/site_upload/study_period.py index 276a12c..d4c73d7 100644 --- a/src/handlers/site_upload/study_period.py +++ b/src/handlers/site_upload/study_period.py @@ -1,15 +1,14 @@ """ Lambda for updating date ranges associated with studies """ import os - from datetime import datetime, timezone import awswrangler import boto3 +from src.handlers.shared.awswrangler_functions import get_s3_study_meta_list from src.handlers.shared.decorators import generic_error_handler from src.handlers.shared.enums import BucketPath, JsonFilename -from src.handlers.shared.awswrangler_functions import get_s3_study_meta_list from src.handlers.shared.functions import ( http_response, read_metadata, @@ -18,7 +17,7 @@ ) -def update_study_period(s3_client, s3_bucket, site, study, data_package): +def update_study_period(s3_client, s3_bucket, site, study, data_package, version): """gets earliest/latest date from study metadata files""" path = get_s3_study_meta_list( BucketPath.STUDY_META.value, s3_bucket, study, data_package, site @@ -29,11 +28,13 @@ def update_study_period(s3_client, s3_bucket, site, study, data_package): study_meta = read_metadata( s3_client, s3_bucket, meta_type=JsonFilename.STUDY_PERIODS.value ) + study_meta = update_metadata( study_meta, site, study, data_package, + version, "earliest_date", df["min_date"][0], meta_type=JsonFilename.STUDY_PERIODS.value, @@ -43,6 +44,7 @@ def update_study_period(s3_client, s3_bucket, site, study, data_package): site, study, data_package, + version, "latest_date", df["max_date"][0], meta_type=JsonFilename.STUDY_PERIODS.value, @@ -52,6 +54,7 @@ def update_study_period(s3_client, s3_bucket, site, study, data_package): site, study, data_package, + version, "last_data_update", datetime.now(timezone.utc), meta_type=JsonFilename.STUDY_PERIODS.value, @@ -68,11 +71,11 @@ def study_period_handler(event, context): s3_bucket = os.environ.get("BUCKET_NAME") s3_client = boto3.client("s3") s3_key = event["Records"][0]["Sns"]["Message"] - s3_key_array = s3_key.split("/") - site = s3_key_array[3] - study = s3_key_array[1] - data_package = s3_key_array[2] - - update_study_period(s3_client, s3_bucket, site, study, data_package) + path_params = s3_key.split("/") + study = path_params[1] + data_package = path_params[2] + site = path_params[3] + version = path_params[4] + update_study_period(s3_client, s3_bucket, site, study, data_package, version) res = http_response(200, "Study period update successful") return res diff --git a/tests/conftest.py b/tests/conftest.py index c453c8b..09885ae 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,21 +22,19 @@ aggregator """ import os - from unittest import mock import boto3 import pytest - -from moto import mock_s3, mock_athena, mock_sns - +from moto import mock_athena, mock_s3, mock_sns from scripts.credential_management import create_auth, create_meta + from src.handlers.shared.enums import BucketPath, JsonFilename from src.handlers.shared.functions import write_metadata -from tests.utils import get_mock_metadata, get_mock_study_metadata, ITEM_COUNT, MOCK_ENV +from tests.utils import ITEM_COUNT, MOCK_ENV, get_mock_metadata, get_mock_study_metadata -def _init_mock_data(s3_client, bucket_name, study, data_package): +def _init_mock_data(s3_client, bucket_name, study, data_package, version): """Creates data in bucket for use in unit tests The following items are added: @@ -51,13 +49,13 @@ def _init_mock_data(s3_client, bucket_name, study, data_package): "./tests/test_data/count_synthea_patient_agg.parquet", bucket_name, f"{BucketPath.AGGREGATE.value}/{study}/" - f"{study}__{data_package}/{study}__{data_package}__aggregate.parquet", + f"{study}__{data_package}/{version}/{study}__{data_package}__aggregate.parquet", ) s3_client.upload_file( "./tests/test_data/count_synthea_patient_agg.csv", bucket_name, f"{BucketPath.CSVAGGREGATE.value}/{study}/" - f"{study}__{data_package}/{study}__{data_package}__aggregate.csv", + f"{study}__{data_package}/{version}/{study}__{data_package}__aggregate.csv", ) s3_client.upload_file( "./tests/test_data/data_packages_cache.json", @@ -90,8 +88,8 @@ def mock_bucket(): bucket = os.environ["BUCKET_NAME"] s3_client.create_bucket(Bucket=bucket) aggregate_params = [ - ["study", "encounter"], - ["other_study", "encounter"], + ["study", "encounter", "099"], + ["other_study", "encounter", "099"], ] for param_list in aggregate_params: _init_mock_data(s3_client, bucket, *param_list) diff --git a/tests/dashboard/test_get_chart_data.py b/tests/dashboard/test_get_chart_data.py index 643afe9..782729a 100644 --- a/tests/dashboard/test_get_chart_data.py +++ b/tests/dashboard/test_get_chart_data.py @@ -1,11 +1,10 @@ import json import os +from unittest import mock import pandas import pytest -from unittest import mock - from src.handlers.dashboard import get_chart_data from tests.utils import MOCK_ENV, TEST_BUCKET, TEST_GLUE_DB, TEST_WORKGROUP diff --git a/tests/dashboard/test_get_metadata.py b/tests/dashboard/test_get_metadata.py index 73084a9..b77b2fa 100644 --- a/tests/dashboard/test_get_metadata.py +++ b/tests/dashboard/test_get_metadata.py @@ -1,34 +1,44 @@ -import boto3 import json import os - -import pytest from datetime import datetime, timezone from unittest import mock +import boto3 +import pytest + +from src.handlers.dashboard.get_metadata import metadata_handler from src.handlers.shared.enums import BucketPath from src.handlers.shared.functions import read_metadata, write_metadata -from src.handlers.dashboard.get_metadata import metadata_handler -from tests.utils import get_mock_metadata, TEST_BUCKET +from tests.utils import TEST_BUCKET, get_mock_metadata @pytest.mark.parametrize( "params,status,expected", [ (None, 200, get_mock_metadata()), - ({"site": "general_hospital"}, 200, get_mock_metadata()["general_hospital"]), ( - {"site": "general_hospital", "study": "study"}, + {"site": "princeton_plainsboro_teaching_hospital"}, + 200, + get_mock_metadata()["princeton_plainsboro_teaching_hospital"], + ), + ( + {"site": "princeton_plainsboro_teaching_hospital", "study": "study"}, 200, - get_mock_metadata()["general_hospital"]["study"], + get_mock_metadata()["princeton_plainsboro_teaching_hospital"]["study"], ), ( - {"site": "general_hospital", "study": "study", "data_package": "encounter"}, + { + "site": "princeton_plainsboro_teaching_hospital", + "study": "study", + "data_package": "encounter", + }, 200, - get_mock_metadata()["general_hospital"]["study"]["encounter"], + get_mock_metadata()["princeton_plainsboro_teaching_hospital"]["study"][ + "encounter" + ], ), ({"site": "chicago_hope", "study": "study"}, 500, None), - ({"site": "general_hospital", "study": "flu"}, 500, None), + ({"site": "princeton_plainsboro_teaching_hospital", "study": "flu"}, 500, None), ], ) def test_get_metadata(mock_bucket, params, status, expected): diff --git a/tests/dashboard/test_get_study_periods.py b/tests/dashboard/test_get_study_periods.py index 7f02efb..35eab78 100644 --- a/tests/dashboard/test_get_study_periods.py +++ b/tests/dashboard/test_get_study_periods.py @@ -1,14 +1,14 @@ -import boto3 import json import os - -import pytest from datetime import datetime, timezone from unittest import mock +import boto3 +import pytest + +from src.handlers.dashboard.get_study_periods import study_periods_handler from src.handlers.shared.enums import BucketPath from src.handlers.shared.functions import read_metadata, write_metadata -from src.handlers.dashboard.get_study_periods import study_periods_handler from tests.utils import get_mock_study_metadata @@ -17,17 +17,19 @@ [ (None, 200, get_mock_study_metadata()), ( - {"site": "general_hospital"}, + {"site": "princeton_plainsboro_teaching_hospital"}, 200, - get_mock_study_metadata()["general_hospital"], + get_mock_study_metadata()["princeton_plainsboro_teaching_hospital"], ), ( - {"site": "general_hospital", "study": "study"}, + {"site": "princeton_plainsboro_teaching_hospital", "study": "study"}, 200, - get_mock_study_metadata()["general_hospital"]["study"], + get_mock_study_metadata()["princeton_plainsboro_teaching_hospital"][ + "study" + ], ), ({"site": "chicago_hope", "study": "study"}, 500, None), - ({"site": "general_hospital", "study": "flu"}, 500, None), + ({"site": "princeton_plainsboro_teaching_hospital", "study": "flu"}, 500, None), ], ) def test_get_study_periods(mock_bucket, params, status, expected): diff --git a/tests/dashboard/test_get_subscriptions.py b/tests/dashboard/test_get_subscriptions.py index 86ddc41..5b928ca 100644 --- a/tests/dashboard/test_get_subscriptions.py +++ b/tests/dashboard/test_get_subscriptions.py @@ -1,14 +1,12 @@ import os - from unittest import mock import awswrangler import pandas - from pytest_mock import MockerFixture from src.handlers.dashboard.get_data_packages import data_packages_handler -from tests.utils import get_mock_metadata, MOCK_ENV, DATA_PACKAGE_COUNT +from tests.utils import DATA_PACKAGE_COUNT, MOCK_ENV, get_mock_metadata @mock.patch.dict(os.environ, MOCK_ENV) diff --git a/tests/site_upload/test_api_gateway_authorizer.py b/tests/site_upload/test_api_gateway_authorizer.py index 44c8ed1..94bdc82 100644 --- a/tests/site_upload/test_api_gateway_authorizer.py +++ b/tests/site_upload/test_api_gateway_authorizer.py @@ -1,14 +1,13 @@ import json import os -import pytest - from contextlib import nullcontext as does_not_raise from unittest import mock +import pytest from pytest_mock import MockerFixture from src.handlers.site_upload.api_gateway_authorizer import lambda_handler -from tests.utils import get_mock_auth, TEST_BUCKET +from tests.utils import TEST_BUCKET, get_mock_auth @pytest.mark.parametrize( diff --git a/tests/site_upload/test_cache_api.py b/tests/site_upload/test_cache_api.py index f07f945..836677b 100644 --- a/tests/site_upload/test_cache_api.py +++ b/tests/site_upload/test_cache_api.py @@ -1,13 +1,12 @@ import os -import pytest - from unittest import mock import awswrangler import pandas +import pytest from src.handlers.site_upload.cache_api import cache_api_handler -from tests.utils import get_mock_data_packages_cache, MOCK_ENV +from tests.utils import MOCK_ENV, get_mock_data_packages_cache def mock_data_packages(*args, **kwargs): diff --git a/tests/site_upload/test_fetch_upload_url.py b/tests/site_upload/test_fetch_upload_url.py index 7bc6e7f..665e79b 100644 --- a/tests/site_upload/test_fetch_upload_url.py +++ b/tests/site_upload/test_fetch_upload_url.py @@ -1,12 +1,18 @@ -import boto3 import json import os -import pytest - from unittest import mock +import boto3 +import pytest + from src.handlers.site_upload.fetch_upload_url import upload_url_handler -from tests.utils import TEST_BUCKET, get_mock_metadata +from tests.utils import ( + EXISTING_DATA_P, + EXISTING_STUDY, + EXISTING_VERSION, + TEST_BUCKET, + get_mock_metadata, +) @pytest.mark.parametrize( @@ -14,9 +20,10 @@ [ ( { - "study": "covid", - "data_package": "encounter", + "study": EXISTING_STUDY, + "data_package": EXISTING_DATA_P, "filename": "encounter.parquet", + "version": EXISTING_VERSION, }, 200, ), diff --git a/tests/site_upload/test_powerset_merge.py b/tests/site_upload/test_powerset_merge.py index 8905087..1b6cca9 100644 --- a/tests/site_upload/test_powerset_merge.py +++ b/tests/site_upload/test_powerset_merge.py @@ -1,98 +1,112 @@ -import boto3 import io import os - from contextlib import nullcontext as does_not_raise +from datetime import datetime, timezone from unittest import mock import awswrangler +import boto3 import pytest - -from datetime import datetime, timezone -from pandas import DataFrame, read_parquet from freezegun import freeze_time +from pandas import DataFrame, read_parquet from src.handlers.shared.enums import BucketPath from src.handlers.shared.functions import read_metadata, write_metadata from src.handlers.site_upload.powerset_merge import ( - powerset_merge_handler, - expand_and_concat_sets, MergeError, + expand_and_concat_sets, + powerset_merge_handler, +) +from tests.utils import ( + EXISTING_DATA_P, + EXISTING_SITE, + EXISTING_STUDY, + EXISTING_VERSION, + ITEM_COUNT, + MOCK_ENV, + NEW_DATA_P, + NEW_SITE, + NEW_STUDY, + NEW_VERSION, + OTHER_SITE, + OTHER_STUDY, + TEST_BUCKET, + get_mock_metadata, ) - -from tests.utils import get_mock_metadata, TEST_BUCKET, ITEM_COUNT, MOCK_ENV - - -SITE_NAME = "princeton_plainsboro_teaching_hospital" -NEW_SITE_NAME = "chicago_hope" -NEW_STUDY_NAME = "new_study" -EXISTING_STUDY_NAME = "study" -DATA_P_NAME = "encounter" @freeze_time("2020-01-01") @pytest.mark.parametrize( - "site,upload_file,upload_path,event_key,archives,status,expected_contents", + "upload_file,upload_path,event_key,archives,status,expected_contents", [ ( # Adding a new data package to a site with uploads - f"{SITE_NAME}", "./tests/test_data/count_synthea_patient.parquet", - f"/{NEW_STUDY_NAME}/{DATA_P_NAME}/{SITE_NAME}/encounter.parquet", - f"/{NEW_STUDY_NAME}/{DATA_P_NAME}/{SITE_NAME}/encounter.parquet", + f"/{NEW_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/encounter.parquet", + f"/{NEW_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/encounter.parquet", False, 200, ITEM_COUNT + 3, ), ( # Adding a new data package to a site without uploads - f"{NEW_SITE_NAME}", "./tests/test_data/count_synthea_patient.parquet", - f"/{NEW_STUDY_NAME}/{DATA_P_NAME}/{NEW_SITE_NAME}/encounter.parquet", - f"/{NEW_STUDY_NAME}/{DATA_P_NAME}/{NEW_SITE_NAME}/encounter.parquet", + f"/{NEW_STUDY}/{EXISTING_DATA_P}/{NEW_SITE}/{EXISTING_VERSION}/encounter.parquet", + f"/{NEW_STUDY}/{EXISTING_DATA_P}/{NEW_SITE}/{EXISTING_VERSION}/encounter.parquet", False, 200, ITEM_COUNT + 3, ), ( # Updating an existing data package - f"{SITE_NAME}", "./tests/test_data/count_synthea_patient.parquet", - f"/{EXISTING_STUDY_NAME}/{DATA_P_NAME}/{SITE_NAME}/encounter.parquet", - f"/{EXISTING_STUDY_NAME}/{DATA_P_NAME}/{SITE_NAME}/encounter.parquet", + f"/{EXISTING_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/encounter.parquet", + f"/{EXISTING_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/encounter.parquet", True, 200, ITEM_COUNT + 2, ), + ( # Updating an existing data package + "./tests/test_data/count_synthea_patient.parquet", + f"/{EXISTING_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{NEW_VERSION}/encounter.parquet", + f"/{EXISTING_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{NEW_VERSION}/encounter.parquet", + True, + 200, + ITEM_COUNT + 4, + ), ( # Invalid parquet file - f"{SITE_NAME}", "./tests/site_upload/test_powerset_merge.py", - f"/{NEW_STUDY_NAME}/{DATA_P_NAME}/{SITE_NAME}/patient.parquet", - f"/{NEW_STUDY_NAME}/{DATA_P_NAME}/{SITE_NAME}/patient.parquet", + f"/{NEW_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/patient.parquet", + f"/{NEW_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/patient.parquet", False, 500, ITEM_COUNT + 1, ), ( # Checks presence of commas in strings does not cause an error - f"{SITE_NAME}", "./tests/test_data/cube_strings_with_commas.parquet", - f"/{NEW_STUDY_NAME}/{DATA_P_NAME}/{SITE_NAME}/encounter.parquet", - f"/{NEW_STUDY_NAME}/{DATA_P_NAME}/{SITE_NAME}/encounter.parquet", + f"/{NEW_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/encounter.parquet", + f"/{NEW_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/encounter.parquet", False, 200, ITEM_COUNT + 3, ), ( # Empty file upload - f"{SITE_NAME}", None, - f"/{NEW_STUDY_NAME}/{DATA_P_NAME}/{SITE_NAME}/encounter.parquet", - f"/{NEW_STUDY_NAME}/{DATA_P_NAME}/{SITE_NAME}/encounter.parquet", + f"/{NEW_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/encounter.parquet", + f"/{NEW_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/encounter.parquet", False, 500, ITEM_COUNT + 1, ), + ( # Race condition - file deleted before job starts + None, + None, + f"/{NEW_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/encounter.parquet", + False, + 500, + ITEM_COUNT, + ), ], ) @mock.patch.dict(os.environ, MOCK_ENV) def test_powerset_merge_single_upload( - site, upload_file, upload_path, event_key, @@ -109,7 +123,7 @@ def test_powerset_merge_single_upload( TEST_BUCKET, f"{BucketPath.LATEST.value}{upload_path}", ) - else: + elif upload_path is not None: with io.BytesIO(DataFrame().to_parquet()) as upload_fileobj: s3_client.upload_fileobj( upload_fileobj, @@ -133,9 +147,10 @@ def test_powerset_merge_single_upload( # This array looks like: # ['', 'study', 'package', 'site', 'file'] event_list = event_key.split("/") - expected_study = event_list[1] - expected_package = event_list[2] - expected_site = event_list[3] + study = event_list[1] + package = event_list[2] + site = event_list[3] + version = event_list[4] res = powerset_merge_handler(event, {}) assert res["statusCode"] == status s3_res = s3_client.list_objects_v2(Bucket=TEST_BUCKET) @@ -144,34 +159,29 @@ def test_powerset_merge_single_upload( if item["Key"].endswith("aggregate.parquet"): assert item["Key"].startswith(BucketPath.AGGREGATE.value) # This finds the aggregate that was created/updated - ie it skips mocks - if ( - expected_study in item["Key"] - and expected_study in item["Key"] - and status == 200 - ): + if study in item["Key"] and status == 200: agg_df = awswrangler.s3.read_parquet( f"s3://{TEST_BUCKET}/{item['Key']}" ) - assert (agg_df["site"].eq(expected_site)).any() + assert (agg_df["site"].eq(site)).any() elif item["Key"].endswith("aggregate.csv"): assert item["Key"].startswith(BucketPath.CSVAGGREGATE.value) elif item["Key"].endswith("transactions.json"): assert item["Key"].startswith(BucketPath.META.value) metadata = read_metadata(s3_client, TEST_BUCKET) if res["statusCode"] == 200: - study = event_key.split("/")[1] assert ( - metadata[site][study][DATA_P_NAME]["last_aggregation"] + metadata[site][study][EXISTING_DATA_P][version]["last_aggregation"] == datetime.now(timezone.utc).isoformat() ) else: assert ( - metadata["general_hospital"]["study"]["encounter"][ - "last_aggregation" - ] - == get_mock_metadata()["general_hospital"]["study"]["encounter"][ - "last_aggregation" - ] + metadata["princeton_plainsboro_teaching_hospital"]["study"][ + "encounter" + ]["099"]["last_aggregation"] + == get_mock_metadata()["princeton_plainsboro_teaching_hospital"][ + "study" + ]["encounter"]["099"]["last_aggregation"] ) elif item["Key"].startswith(BucketPath.LAST_VALID.value): assert item["Key"] == (f"{BucketPath.LAST_VALID.value}{upload_path}") @@ -214,31 +224,31 @@ def test_powerset_merge_join_study_data( s3_client.upload_file( upload_file, TEST_BUCKET, - f"{BucketPath.LATEST.value}/{EXISTING_STUDY_NAME}/" - f"{DATA_P_NAME}/{NEW_SITE_NAME}/encounter.parquet", + f"{BucketPath.LATEST.value}/{EXISTING_STUDY}/" + f"{EXISTING_DATA_P}/{NEW_SITE}/{EXISTING_VERSION}/encounter.parquet", ) s3_client.upload_file( "./tests/test_data/count_synthea_patient.parquet", TEST_BUCKET, - f"{BucketPath.LAST_VALID.value}/{EXISTING_STUDY_NAME}/" - f"{DATA_P_NAME}/{SITE_NAME}/encounter.parquet", + f"{BucketPath.LAST_VALID.value}/{EXISTING_STUDY}/" + f"{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/encounter.parquet", ) if archives: s3_client.upload_file( "./tests/test_data/count_synthea_patient.parquet", TEST_BUCKET, - f"{BucketPath.LAST_VALID.value}/{EXISTING_STUDY_NAME}/" - f"{DATA_P_NAME}/{NEW_SITE_NAME}/encounter.parquet", + f"{BucketPath.LAST_VALID.value}/{EXISTING_STUDY}/" + f"{EXISTING_DATA_P}/{NEW_SITE}/{EXISTING_VERSION}/encounter.parquet", ) event = { "Records": [ { "Sns": { - "Message": f"{BucketPath.LATEST.value}/{EXISTING_STUDY_NAME}" - f"/{DATA_P_NAME}/{NEW_SITE_NAME}/encounter.parquet" + "Message": f"{BucketPath.LATEST.value}/{EXISTING_STUDY}" + f"/{EXISTING_DATA_P}/{NEW_SITE}/{EXISTING_VERSION}/encounter.parquet" }, } ] @@ -289,4 +299,4 @@ def test_expand_and_concat(mock_bucket, upload_file, load_empty, raises): TEST_BUCKET, s3_path, ) - expand_and_concat_sets(df, f"s3://{TEST_BUCKET}/{s3_path}", EXISTING_STUDY_NAME) + expand_and_concat_sets(df, f"s3://{TEST_BUCKET}/{s3_path}", EXISTING_STUDY) diff --git a/tests/site_upload/test_process_upload.py b/tests/site_upload/test_process_upload.py index 531c185..34ad19f 100644 --- a/tests/site_upload/test_process_upload.py +++ b/tests/site_upload/test_process_upload.py @@ -1,72 +1,84 @@ -import boto3 import os +from datetime import datetime, timezone +import boto3 import pytest - -from datetime import datetime, timezone from freezegun import freeze_time from src.handlers.shared.enums import BucketPath from src.handlers.shared.functions import read_metadata, write_metadata from src.handlers.site_upload.process_upload import process_upload_handler - -from tests.utils import TEST_BUCKET, ITEM_COUNT +from tests.utils import ( + EXISTING_DATA_P, + EXISTING_SITE, + EXISTING_STUDY, + EXISTING_VERSION, + ITEM_COUNT, + NEW_DATA_P, + NEW_SITE, + NEW_STUDY, + NEW_VERSION, + OTHER_SITE, + OTHER_STUDY, + TEST_BUCKET, +) @freeze_time("2020-01-01") @pytest.mark.parametrize( - "site,upload_file,upload_path,event_key,status,expected_contents", + "upload_file,upload_path,event_key,status,expected_contents", [ ( # Adding a new data package to a site with uploads - "princeton_plainsboro_teaching_hospital", "./tests/test_data/cube_simple_example.parquet", - "/covid/encounter/princeton_plainsboro_teaching_hospital/document.parquet", - "/covid/encounter/princeton_plainsboro_teaching_hospital/document.parquet", + f"/{EXISTING_STUDY}/{NEW_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/document.parquet", + f"/{EXISTING_STUDY}/{NEW_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/document.parquet", 200, ITEM_COUNT + 1, ), ( # Adding a new data package to a site without uploads - "chicago_hope", "./tests/test_data/cube_simple_example.parquet", - "/covid/encounter/chicago_hope/document.parquet", - "/covid/encounter/chicago_hope/document.parquet", + f"/{EXISTING_STUDY}/{NEW_DATA_P}/{NEW_SITE}/{EXISTING_VERSION}/document.parquet", + f"/{EXISTING_STUDY}/{NEW_DATA_P}/{NEW_SITE}/{EXISTING_VERSION}/document.parquet", 200, ITEM_COUNT + 1, ), ( # Updating an existing data package - "princeton_plainsboro_teaching_hospital", "./tests/test_data/cube_simple_example.parquet", - "/covid/encounter/princeton_plainsboro_teaching_hospital/encounter.parquet", - "/covid/encounter/princeton_plainsboro_teaching_hospital/encounter.parquet", + f"/{EXISTING_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/encounter.parquet", + f"/{EXISTING_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/encounter.parquet", + 200, + ITEM_COUNT + 1, + ), + ( # New version of an existing data package + "./tests/test_data/cube_simple_example.parquet", + f"/{EXISTING_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{NEW_VERSION}/encounter.parquet", + f"/{EXISTING_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{NEW_VERSION}/encounter.parquet", 200, ITEM_COUNT + 1, ), ( # Non-parquet file - "princeton_plainsboro_teaching_hospital", "./tests/test_data/cube_simple_example.csv", - "/covid/encounter/princeton_plainsboro_teaching_hospital/document.csv", - "/covid/encounter/princeton_plainsboro_teaching_hospital/document.csv", + f"/{EXISTING_STUDY}/{NEW_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/document.csv", + f"/{EXISTING_STUDY}/{NEW_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/document.csv", 500, ITEM_COUNT + 1, ), ( # S3 event dispatched when file is not present - "princeton_plainsboro_teaching_hospital", None, None, - "/covid/encounter/princeton_plainsboro_teaching_hospital/missing.parquet", + f"/{EXISTING_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/missing.parquet", 500, ITEM_COUNT, ), - ( # Adding study metadata data package - "princeton_plainsboro_teaching_hospital", + ( # Adding metadata data package "./tests/test_data/cube_simple_example.parquet", ( - "/covid/encounter/princeton_plainsboro_teaching_hospital/" - "document_meta_date.parquet" + f"/{EXISTING_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/" + f"{EXISTING_VERSION}/document_meta_date.parquet" ), ( - "/covid/encounter/princeton_plainsboro_teaching_hospital/" - "document_meta_date.parquet" + f"/{EXISTING_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/" + f"{EXISTING_VERSION}/document_meta_date.parquet" ), 200, ITEM_COUNT + 1, @@ -74,7 +86,6 @@ ], ) def test_process_upload( - site, upload_file, upload_path, event_key, @@ -111,9 +122,14 @@ def test_process_upload( elif item["Key"].endswith("transactions.json"): assert item["Key"].startswith(BucketPath.META.value) metadata = read_metadata(s3_client, TEST_BUCKET) - if upload_file is not None: + if upload_file is not None and upload_path is not None: + path_params = upload_path.split("/") + study = path_params[1] + data_package = path_params[2] + site = path_params[3] + version = path_params[4] assert ( - metadata[site]["covid"]["encounter"]["last_upload"] + metadata[site][study][data_package][version]["last_upload"] == datetime.now(timezone.utc).isoformat() ) elif item["Key"].startswith(BucketPath.STUDY_META.value): diff --git a/tests/site_upload/test_study_period.py b/tests/site_upload/test_study_period.py index 0f78481..492b36a 100644 --- a/tests/site_upload/test_study_period.py +++ b/tests/site_upload/test_study_period.py @@ -1,78 +1,92 @@ -import boto3 import csv import os +from datetime import datetime, timezone +import boto3 import pytest -from datetime import datetime, timezone from freezegun import freeze_time from src.handlers.shared.enums import BucketPath from src.handlers.shared.functions import read_metadata, write_metadata from src.handlers.site_upload.study_period import study_period_handler - -from tests.utils import get_mock_study_metadata, TEST_BUCKET +from tests.utils import ( + EXISTING_DATA_P, + EXISTING_SITE, + EXISTING_STUDY, + EXISTING_VERSION, + NEW_DATA_P, + NEW_SITE, + NEW_STUDY, + NEW_VERSION, + OTHER_SITE, + OTHER_STUDY, + TEST_BUCKET, + get_mock_study_metadata, +) @freeze_time("2020-01-01") @pytest.mark.parametrize( - "site,upload_file,upload_path,event_key,status,study_key", + "upload_file,upload_path,event_key,status", [ ( # Adding a new study to an existing site - "princeton_plainsboro_teaching_hospital", "./tests/test_data/meta_date.parquet", ( - "/test/test_meta_date/princeton_plainsboro_teaching_hospital/" + f"/{NEW_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/" "test_meta_date.parquet" ), ( - "/test/test_meta_date/princeton_plainsboro_teaching_hospital/" + f"/{NEW_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/" "test_meta_date.parquet" ), 200, - "test", ), ( # Adding a new study to a new site - "chicago_hope", "./tests/test_data/meta_date.parquet", - "/test/test_meta_date/chicago_hope/test_meta_date.parquet", - "/test/test_meta_date/chicago_hope/test_meta_date.parquet", + f"/{NEW_STUDY}/{EXISTING_DATA_P}/{NEW_SITE}/{EXISTING_VERSION}/test_meta_date.parquet", + f"/{NEW_STUDY}/{EXISTING_DATA_P}/{NEW_SITE}/{EXISTING_VERSION}/test_meta_date.parquet", + 200, + ), + ( # newer version of existing study + "./tests/test_data/meta_date.parquet", + ( + f"/{EXISTING_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{NEW_VERSION}/" + "test_meta_date.parquet" + ), + ( + f"/{EXISTING_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{NEW_VERSION}/" + "test_meta_date.parquet" + ), 200, - "test", ), ( # updating an existing study - "princeton_plainsboro_teaching_hospital", "./tests/test_data/meta_date.parquet", ( - "/covid/test_meta_date/princeton_plainsboro_teaching_hospital/" + f"/{EXISTING_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/" "test_meta_date.parquet" ), ( - "/covid/test_meta_date/princeton_plainsboro_teaching_hospital/" + f"/{EXISTING_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/" "test_meta_date.parquet" ), 200, - "covid", ), ( # invalid file - "princeton_plainsboro_teaching_hospital", "./tests/test_data/meta_date.parquet", None, ( - "/covid/test_meta_date/princeton_plainsboro_teaching_hospital/" + f"/{EXISTING_STUDY}/{EXISTING_DATA_P}/{EXISTING_SITE}/{EXISTING_VERSION}/" "wrong.parquet" ), 500, - None, ), ], ) def test_process_upload( - site, upload_file, upload_path, event_key, status, - study_key, mock_bucket, ): s3_client = boto3.client("s3", region_name="us-east-1") @@ -88,10 +102,14 @@ def test_process_upload( res = study_period_handler(event, {}) assert res["statusCode"] == status metadata = read_metadata(s3_client, TEST_BUCKET, meta_type="study_periods") - if study_key is not None: - assert study_key in metadata[site] + if upload_file is not None and upload_path is not None: + path_params = upload_path.split("/") + study = path_params[1] + site = path_params[3] + version = path_params[4] + assert study in metadata[site] assert ( - metadata[site][study_key]["last_data_update"] + metadata[site][study][version]["last_data_update"] == datetime.now(timezone.utc).isoformat() ) with open("./tests/test_data/meta_date.csv", "r") as file: @@ -99,5 +117,7 @@ def test_process_upload( # discarding CSV header row next(reader) row = next(reader) - assert metadata[site][study_key]["earliest_date"] == f"{row[0]}T00:00:00" - assert metadata[site][study_key]["latest_date"] == f"{row[1]}T00:00:00" + assert ( + metadata[site][study][version]["earliest_date"] == f"{row[0]}T00:00:00" + ) + assert metadata[site][study][version]["latest_date"] == f"{row[1]}T00:00:00" diff --git a/tests/utils.py b/tests/utils.py index 6eb0a42..8f479cf 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -9,6 +9,17 @@ ITEM_COUNT = 9 DATA_PACKAGE_COUNT = 2 +EXISTING_SITE = "princeton_plainsboro_teaching_hospital" +NEW_SITE = "chicago_hope" +OTHER_SITE = "st_elsewhere" +NEW_STUDY = "new_study" +EXISTING_STUDY = "study" +OTHER_STUDY = "other_study" +EXISTING_DATA_P = "encounter" +NEW_DATA_P = "document" +EXISTING_VERSION = "099" +NEW_VERSION = "100" + # This is a convenience for loading into os.environ with mock.patch.dict. # Other cases should probably use the getter version below. MOCK_ENV = { @@ -23,37 +34,43 @@ def get_mock_metadata(): return { - "general_hospital": { - "study": { - "encounter": { - "version": "1.0", - "last_upload": "2023-02-24T15:03:34+00:00", - "last_data_update": "2023-02-24T15:03:40.657583+00:00", - "last_aggregation": "2023-02-24T15:08:07.504595+00:00", - "last_error": None, - "deleted": None, + EXISTING_SITE: { + EXISTING_STUDY: { + EXISTING_DATA_P: { + EXISTING_VERSION: { + "version": "1.0", + "last_upload": "2023-02-24T15:03:34+00:00", + "last_data_update": "2023-02-24T15:03:40.657583+00:00", + "last_aggregation": "2023-02-24T15:08:07.504595+00:00", + "last_error": None, + "deleted": None, + } } }, - "other_study": { - "encounter": { - "version": "1.0", - "last_upload": "2023-02-24T15:43:57+00:00", - "last_data_update": "2023-02-24T15:44:03.861574+00:00", - "last_aggregation": "2023-02-24T15:44:03.861574+00:00", - "last_error": None, - "deleted": None, + OTHER_STUDY: { + EXISTING_DATA_P: { + EXISTING_VERSION: { + "version": "1.0", + "last_upload": "2023-02-24T15:43:57+00:00", + "last_data_update": "2023-02-24T15:44:03.861574+00:00", + "last_aggregation": "2023-02-24T15:44:03.861574+00:00", + "last_error": None, + "deleted": None, + } } }, }, - "st_elsewhere": { - "study": { - "encounter": { - "version": "1.0", - "last_upload": "2023-02-24T15:08:06+00:00", - "last_data_update": "2023-02-24T15:08:07.771080+00:00", - "last_aggregation": "2023-02-24T15:08:07.771080+00:00", - "last_error": None, - "deleted": None, + OTHER_SITE: { + EXISTING_STUDY: { + EXISTING_DATA_P: { + EXISTING_VERSION: { + "version": "1.0", + "last_upload": "2023-02-24T15:08:06+00:00", + "last_data_update": "2023-02-24T15:08:07.771080+00:00", + "last_aggregation": "2023-02-24T15:08:07.771080+00:00", + "last_error": None, + "deleted": None, + } } } }, @@ -62,26 +79,32 @@ def get_mock_metadata(): def get_mock_study_metadata(): return { - "general_hospital": { - "study": { - "version": "1.0", - "last_data_update": "2023-02-24T15:03:40.657583+00:00", - "earliest_data": "2020-02-24T15:03:40.657583+00:00", - "latest_data": "2023-02-24T15:03:40.657583+00:00", + EXISTING_SITE: { + EXISTING_STUDY: { + EXISTING_VERSION: { + "version": "1.0", + "last_data_update": "2023-02-24T15:03:40.657583+00:00", + "earliest_data": "2020-02-24T15:03:40.657583+00:00", + "latest_data": "2023-02-24T15:03:40.657583+00:00", + } }, - "other_study": { - "version": "1.0", - "last_data_update": "2023-02-24T15:44:03.861574+00:00", - "earliest_data": "2020-02-24T15:03:40.657583+00:00", - "latest_data": "2023-02-24T15:03:40.657583+00:00", + OTHER_STUDY: { + EXISTING_VERSION: { + "version": "1.0", + "last_data_update": "2023-02-24T15:44:03.861574+00:00", + "earliest_data": "2020-02-24T15:03:40.657583+00:00", + "latest_data": "2023-02-24T15:03:40.657583+00:00", + } }, }, - "st_elsewhere": { - "study": { - "version": "1.0", - "last_data_update": "2023-02-24T15:08:07.771080+00:00", - "earliest_data": "2020-02-24T15:03:40.657583+00:00", - "latest_data": "2023-02-24T15:03:40.657583+00:00", + OTHER_SITE: { + EXISTING_STUDY: { + EXISTING_VERSION: { + "version": "1.0", + "last_data_update": "2023-02-24T15:08:07.771080+00:00", + "earliest_data": "2020-02-24T15:03:40.657583+00:00", + "latest_data": "2023-02-24T15:03:40.657583+00:00", + } } }, }