Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prmdr 336 subtask #82

Merged
merged 14 commits into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 41 additions & 1 deletion .github/workflows/lambdas-deploy-feature-to-sandbox.yml
Original file line number Diff line number Diff line change
Expand Up @@ -409,4 +409,44 @@ jobs:
with:
aws_region: ${{ vars.AWS_REGION }}
function_name: ${{ github.event.inputs.sandboxWorkspace}}_LloydGeorgeStitchLambda
zip_file: package_lambdas_lloyd_george_record_stitch_handler.zip
zip_file: package_lambdas_lloyd_george_record_stitch_handler.zip


python_deploy_bulk_upload_lambda:
runs-on: ubuntu-latest
environment: development
needs: ["python_lambdas_test"]
strategy:
matrix:
python-version: ["3.11"]

steps:
- name: Checkout
uses: actions/checkout@v3

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Make virtual environment
run: |
make env

- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: ${{ secrets.AWS_ASSUME_ROLE }}
role-skip-session-tagging: true
aws-region: ${{ vars.AWS_REGION }}

- name: Create release package for Bulk Upload Metadata Lambda
run: |
make lambda_name=bulk_upload_metadata_handler zip

- name: Upload Lambda Function for BulkUploadMetadataLambda
uses: appleboy/lambda-action@master
with:
aws_region: ${{ vars.AWS_REGION }}
function_name: ${{ github.event.inputs.sandboxWorkspace}}_BulkUploadMetadataLambda
zip_file: package_lambdas_bulk_upload_metadata_handler.zip
43 changes: 42 additions & 1 deletion .github/workflows/lambdas-deploy-to-test-manual.yml
Original file line number Diff line number Diff line change
Expand Up @@ -416,4 +416,45 @@ jobs:
with:
aws_region: ${{ vars.AWS_REGION }}
function_name: ${{ vars.BUILD_ENV}}_LloydGeorgeStitchLambda
zip_file: package_lambdas_lloyd_george_record_stitch_handler.zip
zip_file: package_lambdas_lloyd_george_record_stitch_handler.zip

python_deploy_bulk_upload_lambda:
runs-on: ubuntu-latest
environment: test
needs: ["python_lambdas_test"]
strategy:
matrix:
python-version: ["3.11"]

steps:
- name: Checkout
uses: actions/checkout@v3
with:
ref: ${{ github.event.inputs.buildBranch}}

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Make virtual environment
run: |
make env

- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: ${{ secrets.AWS_ASSUME_ROLE }}
role-skip-session-tagging: true
aws-region: ${{ vars.AWS_REGION }}

- name: Create release package for Bulk Upload Metadata Lambda
run: |
make lambda_name=bulk_upload_metadata_handler zip

- name: Upload Lambda Function for BulkUploadMetadataLambda
uses: appleboy/lambda-action@master
with:
aws_region: ${{ vars.AWS_REGION }}
function_name: ${{ vars.BUILD_ENV}}_BulkUploadMetadataLambda
zip_file: package_lambdas_bulk_upload_metadata_handler.zip
52 changes: 51 additions & 1 deletion .github/workflows/lambdas-dev-to-main-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ jobs:
authoriser_changed: ${{steps.filter.outputs.authoriser}}
logout_changed: ${{steps.filter.outputs.logout}}
lloyd_george_stitch_changed: ${{steps.filter.outputs.lloyd_george_stitch}}
bulk_upload_metadata_changed: ${{steps.filter.outputs.bulk_upload_metadata}}
steps:
- name: Checkout
uses: actions/checkout@v3
Expand Down Expand Up @@ -69,7 +70,9 @@ jobs:
logout:
- 'lambdas/handlers/logout_handler.py'
lloyd_george_stitch:
- 'lambdas/handlers/lloyd_george_record_stitch.py'
- 'lambdas/handlers/lloyd_george_record_stitch_handler.py'
bulk_upload_metadata:
- 'lambdas/handlers/bulk_upload_metadata_handler.py'



Expand Down Expand Up @@ -533,3 +536,50 @@ jobs:
aws_region: ${{ vars.AWS_REGION }}
function_name: ${{ vars.BUILD_ENV}}_LloydGeorgeStitchLambda
zip_file: package_lambdas_lloyd_george_record_stitch_handler.zip

python_deploy_bulk_upload_metadata_lambda:
runs-on: ubuntu-latest
environment: development
needs: [ "python_lambdas_test", "identify_changed_functions" ]
if: |
(github.ref == 'refs/heads/main')
&& (needs.identify_changed_functions.outputs.utils_changed == 'true'
|| needs.identify_changed_functions.outputs.enums_changed == 'true'
|| needs.identify_changed_functions.outputs.services_changed == 'true'
|| needs.identify_changed_functions.outputs.models_changed == 'true'
|| needs.identify_changed_functions.outputs.bulk_upload_metadata_changed == 'true'
)
strategy:
matrix:
python-version: ["3.11"]

steps:
- name: Checkout
uses: actions/checkout@v3

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Make virtual environment
run: |
make env

- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: ${{ secrets.AWS_ASSUME_ROLE }}
role-skip-session-tagging: true
aws-region: ${{ vars.AWS_REGION }}

- name: Create release package for Bulk Upload Metadata Lambda
run: |
make lambda_name=bulk_upload_metadata_handler zip

- name: Upload Lambda Function for BulkUploadMetadataLambda
uses: appleboy/lambda-action@master
with:
aws_region: ${{ vars.AWS_REGION }}
function_name: ${{ vars.BUILD_ENV}}_BulkUploadMetadataLambda
zip_file: package_lambdas_bulk_upload_metadata_handler.zip
14 changes: 13 additions & 1 deletion .github/workflows/new_base-lambdas-reusable-deploy-all.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,16 @@ jobs:
lambda_aws_name: DocumentManifestByNHSNumberLambda
secrets:
AWS_ASSUME_ROLE: ${{ secrets.AWS_ASSUME_ROLE }}


deploy_bulk_upload_lambda:
name: Deploy metadata_bulk_upload_lambda
uses: ./.github/workflows/new_base-lambdas-reusable-deploy.yml
with:
environment: ${{ inputs.environment}}
python_version: ${{ inputs.python_version }}
build_branch: ${{ inputs.build_branch}}
sandbox: ${{ inputs.sandbox }}
lambda_handler_name: bulk_upload_metadata_handler
lambda_aws_name: BulkUploadMetadataLambda
secrets:
AWS_ASSUME_ROLE: ${{ secrets.AWS_ASSUME_ROLE }}
91 changes: 91 additions & 0 deletions lambdas/handlers/bulk_upload_metadata_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import csv
import logging
import os
import tempfile
from typing import Iterable

import pydantic
from botocore.exceptions import ClientError
from models.staging_metadata import (METADATA_FILENAME, NHS_NUMBER_FIELD_NAME,
MetadataFile, StagingMetadata)
from services.s3_service import S3Service
from services.sqs_service import SQSService

logger = logging.getLogger()
logger.setLevel(logging.INFO)


def lambda_handler(_event, _context):
try:
logger.info("Starting metadata reading process")

staging_bucket_name = os.environ["STAGING_STORE_BUCKET_NAME"]
AlexHerbertNHS marked this conversation as resolved.
Show resolved Hide resolved
metadata_queue_url = os.environ["METADATA_SQS_QUEUE_URL"]

logger.info("Fetching metadata.csv from bucket")
metadata_file = download_metadata_from_s3(
staging_bucket_name, METADATA_FILENAME
)

logger.info("Parsing bulk upload metadata")
staging_metadata_list = csv_to_staging_metadata(metadata_file)

logger.info("Finished parsing metadata")
send_metadata_to_sqs(staging_metadata_list, metadata_queue_url)

logger.info("Sent bulk upload metadata to sqs queue")
except pydantic.ValidationError as e:
logger.info("Failed to parse metadata.csv")
logger.error(str(e))
except KeyError as e:
logger.info("Failed due to missing key")
logger.error(str(e))
except ClientError as e:
logger.error(str(e))


def download_metadata_from_s3(staging_bucket_name: str, metadata_filename: str):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this method be moved to an inherited version of the S3service to keep this file a little cleaner?

s3_service = S3Service()
temp_dir = tempfile.mkdtemp()

local_file_path = os.path.join(temp_dir, metadata_filename)
s3_service.download_file(
s3_bucket_name=staging_bucket_name,
file_key=metadata_filename,
download_path=local_file_path,
)
return local_file_path


def csv_to_staging_metadata(csv_file_path: str) -> list[StagingMetadata]:
patients = {}
with open(csv_file_path, mode="r") as csv_file_handler:
csv_reader: Iterable[dict] = csv.DictReader(csv_file_handler)
for row in csv_reader:
file_metadata = MetadataFile.model_validate(row)
nhs_number = row[NHS_NUMBER_FIELD_NAME]
if nhs_number not in patients:
patients[nhs_number] = [file_metadata]
else:
patients[nhs_number] += [file_metadata]

return [
StagingMetadata(nhs_number=nhs_number, files=patients[nhs_number])
for nhs_number in patients
]


def send_metadata_to_sqs(
staging_metadata_list: list[StagingMetadata], metadata_queue_url: str
) -> None:
sqs_service = SQSService()

for staging_metadata in staging_metadata_list:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this method be moved to an inherited version of the SQS service to keep this file a little cleaner?

nhs_number = staging_metadata.nhs_number
logger.info(f"Sending metadata for patientId: {nhs_number}")

sqs_service.send_message_with_nhs_number_attr(
queue_url=metadata_queue_url,
message_body=staging_metadata.model_dump_json(by_alias=True),
nhs_number=nhs_number,
)
34 changes: 34 additions & 0 deletions lambdas/models/staging_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from typing import Optional

from pydantic import BaseModel, ConfigDict, Field

METADATA_FILENAME = "metadata.csv"
NHS_NUMBER_FIELD_NAME = "NHS-NO"


def to_upper_case_with_hyphen(field_name: str) -> str:
return field_name.upper().replace("_", "-")


class MetadataFile(BaseModel):
model_config = ConfigDict(
alias_generator=to_upper_case_with_hyphen, populate_by_name=True
)

file_path: str = Field(alias="FILEPATH")
page_count: str = Field(alias="PAGE COUNT")
gp_practice_code: str
nhs_number: str = Field(exclude=True, alias=NHS_NUMBER_FIELD_NAME)
section: str
sub_section: Optional[str]
scan_date: str
scan_id: str
user_id: str
upload: str


class StagingMetadata(BaseModel):
model_config = ConfigDict(populate_by_name=True)

nhs_number: str = Field(alias=NHS_NUMBER_FIELD_NAME)
files: list[MetadataFile]
24 changes: 24 additions & 0 deletions lambdas/services/sqs_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import logging

import boto3
from botocore.client import Config as BotoConfig

logger = logging.getLogger()
logger.setLevel(logging.INFO)


class SQSService:
def __init__(self):
config = BotoConfig(retries={"max_attempts": 3, "mode": "standard"})
self.client = boto3.client("sqs", config=config)

def send_message_with_nhs_number_attr(
self, queue_url: str, message_body: str, nhs_number: str
):
self.client.send_message(
QueueUrl=queue_url,
MessageAttributes={
"NhsNumber": {"DataType": "String", "StringValue": nhs_number},
},
MessageBody=message_body,
)
9 changes: 9 additions & 0 deletions lambdas/tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,17 @@
MOCK_ZIP_OUTPUT_BUCKET_ENV_NAME = "ZIPPED_STORE_BUCKET_NAME"
MOCK_ZIP_TRACE_TABLE_ENV_NAME = "ZIPPED_STORE_DYNAMODB_NAME"

MOCK_LG_STAGING_STORE_BUCKET_ENV_NAME = "STAGING_STORE_BUCKET_NAME"
MOCK_LG_METADATA_SQS_QUEUE_ENV_NAME = "METADATA_SQS_QUEUE_URL"

MOCK_ARF_TABLE_NAME = "test_arf_dynamoDB_table"
MOCK_LG_TABLE_NAME = "test_lg_dynamoDB_table"
MOCK_ARF_BUCKET = "test_arf_s3_bucket"
MOCK_LG_BUCKET = "test_lg_s3_bucket"
MOCK_ZIP_OUTPUT_BUCKET = "test_s3_output_bucket"
MOCK_ZIP_TRACE_TABLE = "test_zip_table"
MOCK_LG_STAGING_STORE_BUCKET = "test_staging_bulk_store"
MOCK_LG_METADATA_SQS_QUEUE = "test_bulk_upload_metadata_queue"

TEST_NHS_NUMBER = "1111111111"
TEST_OBJECT_KEY = "1234-4567-8912-HSDF-TEST"
Expand All @@ -43,3 +48,7 @@ def set_env(monkeypatch):
)
monkeypatch.setenv(MOCK_ZIP_OUTPUT_BUCKET_ENV_NAME, MOCK_ZIP_OUTPUT_BUCKET)
monkeypatch.setenv(MOCK_ZIP_TRACE_TABLE_ENV_NAME, MOCK_ZIP_TRACE_TABLE)
monkeypatch.setenv(
MOCK_LG_STAGING_STORE_BUCKET_ENV_NAME, MOCK_LG_STAGING_STORE_BUCKET
)
monkeypatch.setenv(MOCK_LG_METADATA_SQS_QUEUE_ENV_NAME, MOCK_LG_METADATA_SQS_QUEUE)
Loading
Loading