-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Ruff * PR feedback, removed unused pylint excepts
- Loading branch information
1 parent
2dea9d6
commit d06c1af
Showing
36 changed files
with
981 additions
and
432 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,10 @@ | ||
default_install_hook_types: [pre-commit, pre-push] | ||
repos: | ||
- repo: https://github.com/psf/black | ||
#this version is synced with the black mentioned in .github/workflows/ci.yml | ||
rev: 22.12.0 | ||
- repo: https://github.com/astral-sh/ruff-pre-commit | ||
rev: v0.2.1 | ||
hooks: | ||
- id: black | ||
entry: bash -c 'black "$@"; git add -u' -- | ||
# It is recommended to specify the latest version of Python | ||
# supported by your project here, or alternatively use | ||
# pre-commit's default_language_version, see | ||
# https://pre-commit.com/#top_level-default_language_version | ||
language_version: python3.9 | ||
- repo: https://github.com/pycqa/isort | ||
rev: 5.12.0 | ||
hooks: | ||
- id: isort | ||
args: ["--profile", "black", "--filter-files"] | ||
- name: Ruff formatting | ||
id: ruff-format | ||
- name: Ruff linting | ||
id: ruff | ||
stages: [pre-push] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
""" Adds a new metadata type, column_types """ | ||
|
||
import argparse | ||
import io | ||
import json | ||
|
||
import boto3 | ||
import pandas | ||
from rich import progress | ||
|
||
|
||
def get_csv_column_datatypes(dtypes): | ||
"""helper for generating column type for dashboard API""" | ||
column_dict = {} | ||
for column in dtypes.index: | ||
if column.endswith("year"): | ||
column_dict[column] = "year" | ||
elif column.endswith("month"): | ||
column_dict[column] = "month" | ||
elif column.endswith("week"): | ||
column_dict[column] = "week" | ||
elif column.endswith("day") or str(dtypes[column]) == "datetime64": | ||
column_dict[column] = "day" | ||
elif "cnt" in column or str(dtypes[column]) in ( | ||
"Int8", | ||
"Int16", | ||
"Int32", | ||
"Int64", | ||
"UInt8", | ||
"UInt16", | ||
"UInt32", | ||
"UInt64", | ||
): | ||
column_dict[column] = "integer" | ||
elif str(dtypes[column]) in ("Float32", "Float64"): | ||
column_dict[column] = "float" | ||
elif str(dtypes[column]) == "boolean": | ||
column_dict[column] = "float" | ||
else: | ||
column_dict[column] = "string" | ||
return column_dict | ||
|
||
|
||
def _put_s3_data(key: str, bucket_name: str, client, data: dict) -> None: | ||
"""Convenience class for writing a dict to S3""" | ||
b_data = io.BytesIO(json.dumps(data).encode()) | ||
client.upload_fileobj(Bucket=bucket_name, Key=key, Fileobj=b_data) | ||
|
||
|
||
def create_column_type_metadata(bucket: str): | ||
"""creates a new metadata dict for column types. | ||
By design, this will replaces an existing column type dict if one already exists. | ||
""" | ||
client = boto3.client("s3") | ||
res = client.list_objects_v2(Bucket=bucket, Prefix="aggregates/") | ||
contents = res["Contents"] | ||
output = {} | ||
for resource in progress.track(contents): | ||
dirs = resource["Key"].split("/") | ||
study = dirs[1] | ||
subscription = dirs[2].split("__")[1] | ||
version = dirs[3] | ||
bytes_buffer = io.BytesIO() | ||
client.download_fileobj( | ||
Bucket=bucket, Key=resource["Key"], Fileobj=bytes_buffer | ||
) | ||
df = pandas.read_parquet(bytes_buffer) | ||
type_dict = get_csv_column_datatypes(df.dtypes) | ||
filename = f"{resource['Key'].split('/')[-1].split('.')[0]}.csv" | ||
output.setdefault(study, {}) | ||
output[study].setdefault(subscription, {}) | ||
output[study][subscription].setdefault(version, {}) | ||
output[study][subscription][version]["columns"] = type_dict | ||
output[study][subscription][version]["filename"] = filename | ||
# print(json.dumps(output, indent=2)) | ||
_put_s3_data("metadata/column_types.json", bucket, client, output) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser( | ||
description="""Creates column types for existing aggregates. """ | ||
) | ||
parser.add_argument("-b", "--bucket", help="bucket name") | ||
args = parser.parse_args() | ||
create_column_type_metadata(args.bucket) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
import os | ||
|
||
import boto3 | ||
import botocore | ||
|
||
from src.handlers.shared import decorators, enums, functions | ||
|
||
|
||
def _format_key( | ||
s3_client, | ||
s3_bucket_name: str, | ||
study: str, | ||
subscription: str, | ||
version: str, | ||
filename: str, | ||
site: str | None = None, | ||
): | ||
"""Creates S3 key from url params""" | ||
if site is not None: | ||
key = f"last_valid/{study}/{study}__{subscription}/{site}/{version}/{filename}" | ||
else: | ||
key = f"csv_aggregates/{study}/{study}__{subscription}/{version}/{filename}" | ||
s3_client.list_objects_v2(Bucket=s3_bucket_name) | ||
try: | ||
s3_client.head_object(Bucket=s3_bucket_name, Key=key) | ||
return key | ||
except botocore.exceptions.ClientError as e: | ||
raise OSError(f"No object found at key {key}") from e | ||
|
||
|
||
def _get_column_types( | ||
s3_client, | ||
s3_bucket_name: str, | ||
study: str, | ||
subscription: str, | ||
version: str, | ||
**kwargs, | ||
) -> dict: | ||
"""Gets column types from the metadata store for a given subscription""" | ||
types_metadata = functions.read_metadata( | ||
s3_client, | ||
s3_bucket_name, | ||
meta_type=enums.JsonFilename.COLUMN_TYPES.value, | ||
) | ||
try: | ||
return types_metadata[study][subscription][version][ | ||
enums.ColumnTypesKeys.COLUMNS.value | ||
] | ||
except KeyError: | ||
return {} | ||
|
||
|
||
@decorators.generic_error_handler(msg="Error retrieving chart data") | ||
def get_csv_handler(event, context): | ||
"""manages event from dashboard api call and creates a temporary URL""" | ||
del context | ||
s3_bucket_name = os.environ.get("BUCKET_NAME") | ||
s3_client = boto3.client("s3") | ||
key = _format_key(s3_client, s3_bucket_name, **event["pathParameters"]) | ||
types = _get_column_types(s3_client, s3_bucket_name, **event["pathParameters"]) | ||
presign_url = s3_client.generate_presigned_url( | ||
"get_object", | ||
Params={ | ||
"Bucket": s3_bucket_name, | ||
"Key": key, | ||
"ResponseContentType": "text/csv", | ||
}, | ||
ExpiresIn=600, | ||
) | ||
extra_headers = { | ||
"Location": presign_url, | ||
"x-column-names": ",".join(key for key in types.keys()), | ||
"x-column-types": ",".join(key for key in types.values()), | ||
# TODO: add x-column-descriptions once a source for column descriptions | ||
# has been established | ||
} | ||
res = functions.http_response(302, "", extra_headers=extra_headers) | ||
return res | ||
|
||
|
||
@decorators.generic_error_handler(msg="Error retrieving csv data") | ||
def get_csv_list_handler(event, context): | ||
"""manages event from dashboard api call and creates a temporary URL""" | ||
del context | ||
s3_bucket_name = os.environ.get("BUCKET_NAME") | ||
s3_client = boto3.client("s3") | ||
if event["path"].startswith("/last_valid"): | ||
key_prefix = "last_valid" | ||
url_prefix = "last_valid" | ||
elif event["path"].startswith("/aggregates"): | ||
key_prefix = "csv_aggregates" | ||
url_prefix = "aggregates" | ||
else: | ||
raise Exception("Unexpected url encountered") | ||
s3_objs = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=key_prefix) | ||
urls = [] | ||
if s3_objs["KeyCount"] == 0: | ||
return functions.http_response(200, urls) | ||
for obj in s3_objs["Contents"]: | ||
key_parts = obj["Key"].split("/") | ||
study = key_parts[1] | ||
subscription = key_parts[2].split("__")[1] | ||
version = key_parts[-2] | ||
filename = key_parts[-1] | ||
site = key_parts[3] if url_prefix == "last_valid" else None | ||
url_parts = [url_prefix, study, subscription, version, filename] | ||
if url_prefix == "last_valid": | ||
url_parts.insert(3, site) | ||
urls.append("/".join(url_parts)) | ||
res = functions.http_response(200, urls) | ||
return res |
Oops, something went wrong.