Skip to content

Commit

Permalink
PR feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
dogversioning committed Feb 26, 2024
1 parent 37050f4 commit fa1b765
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 17 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ dependencies= [
"awswrangler >=3.5, <4",
"boto3",
"pandas >=2, <3",
"rich"
"rich",
]
authors = [
{ name="Matt Garber", email="matthew.garber@childrens.harvard.edu" },
Expand Down
7 changes: 3 additions & 4 deletions src/handlers/dashboard/get_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from src.handlers.shared import decorators, enums, functions


def _format_key(
def _format_and_validate_key(
s3_client,
s3_bucket_name: str,
study: str,
Expand All @@ -20,7 +20,6 @@ def _format_key(
key = f"last_valid/{study}/{study}__{subscription}/{site}/{version}/{filename}"
else:
key = f"csv_aggregates/{study}/{study}__{subscription}/{version}/{filename}"
s3_client.list_objects_v2(Bucket=s3_bucket_name)
try:
s3_client.head_object(Bucket=s3_bucket_name, Key=key)
return key
Expand Down Expand Up @@ -56,7 +55,7 @@ def get_csv_handler(event, context):
del context
s3_bucket_name = os.environ.get("BUCKET_NAME")
s3_client = boto3.client("s3")
key = _format_key(s3_client, s3_bucket_name, **event["pathParameters"])
key = _format_and_validate_key(s3_client, s3_bucket_name, **event["pathParameters"])
types = _get_column_types(s3_client, s3_bucket_name, **event["pathParameters"])
presign_url = s3_client.generate_presigned_url(
"get_object",
Expand Down Expand Up @@ -99,7 +98,7 @@ def get_csv_list_handler(event, context):
return functions.http_response(200, urls)
while True:
for obj in s3_objs["Contents"]:
if obj["Key"].endswith("parquet"):
if not obj["Key"].endswith(".csv"):
continue
key_parts = obj["Key"].split("/")
study = key_parts[1]
Expand Down
7 changes: 5 additions & 2 deletions src/handlers/shared/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,10 @@ def update_metadata(
It's assumed that, other than the version/column/type fields, every item in one
of these metadata dicts is a ISO date string corresponding to an S3 event timestamp
of these metadata dicts is a ISO date string corresponding to an S3 event timestamp.
TODO: if we have other cases of non-datetime metadata, consider breaking this
function into two, one for updating datetimes and one for updating values
"""
check_meta_type(meta_type)
match meta_type:
Expand Down Expand Up @@ -228,7 +231,7 @@ def get_csv_column_datatypes(dtypes):
column_dict[column] = "week"
elif column.endswith("day") or str(dtypes[column]) == "datetime64":
column_dict[column] = "day"
elif "cnt" in column or str(dtypes[column]) in (
elif column.startswith("cnt") or str(dtypes[column]) in (
"Int8",
"Int16",
"Int32",
Expand Down
9 changes: 5 additions & 4 deletions src/handlers/site_upload/powerset_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ def update_local_metadata(
meta_type: str | None = enums.JsonFilename.TRANSACTIONS.value,
):
"""convenience wrapper for update_metadata"""
# We are excluding COLUMN_TYPES explicitly from this first check because,
# by design, it should never have a site field in it - the column types
# are tied to the study version, not a specific site's data
if site is None and meta_type != enums.JsonFilename.COLUMN_TYPES.value:
site = self.site
if metadata is None:
Expand All @@ -126,10 +129,8 @@ def write_local_metadata(
self, metadata: dict | None = None, meta_type: str | None = None
):
"""convenience wrapper for write_metadata"""
if metadata is None:
metadata = self.metadata
if meta_type is None:
meta_type = enums.JsonFilename.TRANSACTIONS.value
metadata = metadata or self.metadata
meta_type = meta_type or enums.JsonFilename.TRANSACTIONS.value
functions.write_metadata(
s3_client=self.s3_client,
s3_bucket_name=self.s3_bucket_name,
Expand Down
1 change: 1 addition & 0 deletions tests/dashboard/test_get_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def test_get_csv(mock_bucket, params, status, expected):
assert (
res["headers"]["x-column-types"] == "integer,string,integer,string,string"
)
assert res["headers"]["x-column-names"] == "cnt,gender,age,race_display,site"
assert res["headers"]["Location"].startswith(url)


Expand Down
10 changes: 4 additions & 6 deletions tests/site_upload/test_powerset_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,12 +219,10 @@ def test_powerset_merge_single_upload(
s3_client, TEST_BUCKET, meta_type=enums.JsonFilename.COLUMN_TYPES.value
)
if res["statusCode"] == 200:
assert (
metadata[study][data_package.split("__")[1]][version][
"last_data_update"
]
== datetime.now(timezone.utc).isoformat()
)
last_update = metadata[study][data_package.split("__")[1]][version][
"last_data_update"
]
assert last_update == datetime.now(timezone.utc).isoformat()

else:
assert (
Expand Down

0 comments on commit fa1b765

Please sign in to comment.