diff --git a/.github/workflows/lambdas-deploy-feature-to-sandbox.yml b/.github/workflows/lambdas-deploy-feature-to-sandbox.yml index 44a18727c..3f237de07 100644 --- a/.github/workflows/lambdas-deploy-feature-to-sandbox.yml +++ b/.github/workflows/lambdas-deploy-feature-to-sandbox.yml @@ -409,4 +409,44 @@ jobs: with: aws_region: ${{ vars.AWS_REGION }} function_name: ${{ github.event.inputs.sandboxWorkspace}}_LloydGeorgeStitchLambda - zip_file: package_lambdas_lloyd_george_record_stitch_handler.zip \ No newline at end of file + zip_file: package_lambdas_lloyd_george_record_stitch_handler.zip + + + python_deploy_bulk_upload_lambda: + runs-on: ubuntu-latest + environment: development + needs: ["python_lambdas_test"] + strategy: + matrix: + python-version: ["3.11"] + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Make virtual environment + run: | + make env + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: ${{ secrets.AWS_ASSUME_ROLE }} + role-skip-session-tagging: true + aws-region: ${{ vars.AWS_REGION }} + + - name: Create release package for Bulk Upload Metadata Lambda + run: | + make lambda_name=bulk_upload_metadata_handler zip + + - name: Upload Lambda Function for BulkUploadMetadataLambda + uses: appleboy/lambda-action@master + with: + aws_region: ${{ vars.AWS_REGION }} + function_name: ${{ github.event.inputs.sandboxWorkspace}}_BulkUploadMetadataLambda + zip_file: package_lambdas_bulk_upload_metadata_handler.zip \ No newline at end of file diff --git a/.github/workflows/lambdas-deploy-to-test-manual.yml b/.github/workflows/lambdas-deploy-to-test-manual.yml index 94bda7e6a..2cd7be1d8 100644 --- a/.github/workflows/lambdas-deploy-to-test-manual.yml +++ b/.github/workflows/lambdas-deploy-to-test-manual.yml @@ -416,4 +416,45 @@ jobs: with: aws_region: ${{ vars.AWS_REGION }} function_name: ${{ vars.BUILD_ENV}}_LloydGeorgeStitchLambda - zip_file: package_lambdas_lloyd_george_record_stitch_handler.zip \ No newline at end of file + zip_file: package_lambdas_lloyd_george_record_stitch_handler.zip + + python_deploy_bulk_upload_lambda: + runs-on: ubuntu-latest + environment: test + needs: ["python_lambdas_test"] + strategy: + matrix: + python-version: ["3.11"] + + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + ref: ${{ github.event.inputs.buildBranch}} + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Make virtual environment + run: | + make env + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: ${{ secrets.AWS_ASSUME_ROLE }} + role-skip-session-tagging: true + aws-region: ${{ vars.AWS_REGION }} + + - name: Create release package for Bulk Upload Metadata Lambda + run: | + make lambda_name=bulk_upload_metadata_handler zip + + - name: Upload Lambda Function for BulkUploadMetadataLambda + uses: appleboy/lambda-action@master + with: + aws_region: ${{ vars.AWS_REGION }} + function_name: ${{ vars.BUILD_ENV}}_BulkUploadMetadataLambda + zip_file: package_lambdas_bulk_upload_metadata_handler.zip \ No newline at end of file diff --git a/.github/workflows/lambdas-dev-to-main-ci.yml b/.github/workflows/lambdas-dev-to-main-ci.yml index 7ff738768..59fdf53f2 100644 --- a/.github/workflows/lambdas-dev-to-main-ci.yml +++ b/.github/workflows/lambdas-dev-to-main-ci.yml @@ -35,6 +35,7 @@ jobs: authoriser_changed: ${{steps.filter.outputs.authoriser}} logout_changed: ${{steps.filter.outputs.logout}} lloyd_george_stitch_changed: ${{steps.filter.outputs.lloyd_george_stitch}} + bulk_upload_metadata_changed: ${{steps.filter.outputs.bulk_upload_metadata}} steps: - name: Checkout uses: actions/checkout@v3 @@ -69,7 +70,9 @@ jobs: logout: - 'lambdas/handlers/logout_handler.py' lloyd_george_stitch: - - 'lambdas/handlers/lloyd_george_record_stitch.py' + - 'lambdas/handlers/lloyd_george_record_stitch_handler.py' + bulk_upload_metadata: + - 'lambdas/handlers/bulk_upload_metadata_handler.py' @@ -533,3 +536,50 @@ jobs: aws_region: ${{ vars.AWS_REGION }} function_name: ${{ vars.BUILD_ENV}}_LloydGeorgeStitchLambda zip_file: package_lambdas_lloyd_george_record_stitch_handler.zip + + python_deploy_bulk_upload_metadata_lambda: + runs-on: ubuntu-latest + environment: development + needs: [ "python_lambdas_test", "identify_changed_functions" ] + if: | + (github.ref == 'refs/heads/main') + && (needs.identify_changed_functions.outputs.utils_changed == 'true' + || needs.identify_changed_functions.outputs.enums_changed == 'true' + || needs.identify_changed_functions.outputs.services_changed == 'true' + || needs.identify_changed_functions.outputs.models_changed == 'true' + || needs.identify_changed_functions.outputs.bulk_upload_metadata_changed == 'true' + ) + strategy: + matrix: + python-version: ["3.11"] + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Make virtual environment + run: | + make env + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v2 + with: + role-to-assume: ${{ secrets.AWS_ASSUME_ROLE }} + role-skip-session-tagging: true + aws-region: ${{ vars.AWS_REGION }} + + - name: Create release package for Bulk Upload Metadata Lambda + run: | + make lambda_name=bulk_upload_metadata_handler zip + + - name: Upload Lambda Function for BulkUploadMetadataLambda + uses: appleboy/lambda-action@master + with: + aws_region: ${{ vars.AWS_REGION }} + function_name: ${{ vars.BUILD_ENV}}_BulkUploadMetadataLambda + zip_file: package_lambdas_bulk_upload_metadata_handler.zip diff --git a/.github/workflows/new_base-lambdas-reusable-deploy-all.yml b/.github/workflows/new_base-lambdas-reusable-deploy-all.yml index 189d26090..5674ce545 100644 --- a/.github/workflows/new_base-lambdas-reusable-deploy-all.yml +++ b/.github/workflows/new_base-lambdas-reusable-deploy-all.yml @@ -91,4 +91,16 @@ jobs: lambda_aws_name: DocumentManifestByNHSNumberLambda secrets: AWS_ASSUME_ROLE: ${{ secrets.AWS_ASSUME_ROLE }} - \ No newline at end of file + + deploy_bulk_upload_lambda: + name: Deploy metadata_bulk_upload_lambda + uses: ./.github/workflows/new_base-lambdas-reusable-deploy.yml + with: + environment: ${{ inputs.environment}} + python_version: ${{ inputs.python_version }} + build_branch: ${{ inputs.build_branch}} + sandbox: ${{ inputs.sandbox }} + lambda_handler_name: bulk_upload_metadata_handler + lambda_aws_name: BulkUploadMetadataLambda + secrets: + AWS_ASSUME_ROLE: ${{ secrets.AWS_ASSUME_ROLE }} \ No newline at end of file diff --git a/lambdas/handlers/bulk_upload_metadata_handler.py b/lambdas/handlers/bulk_upload_metadata_handler.py new file mode 100644 index 000000000..c6ef34f40 --- /dev/null +++ b/lambdas/handlers/bulk_upload_metadata_handler.py @@ -0,0 +1,91 @@ +import csv +import logging +import os +import tempfile +from typing import Iterable + +import pydantic +from botocore.exceptions import ClientError +from models.staging_metadata import (METADATA_FILENAME, NHS_NUMBER_FIELD_NAME, + MetadataFile, StagingMetadata) +from services.s3_service import S3Service +from services.sqs_service import SQSService + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + + +def lambda_handler(_event, _context): + try: + logger.info("Starting metadata reading process") + + staging_bucket_name = os.environ["STAGING_STORE_BUCKET_NAME"] + metadata_queue_url = os.environ["METADATA_SQS_QUEUE_URL"] + + logger.info("Fetching metadata.csv from bucket") + metadata_file = download_metadata_from_s3( + staging_bucket_name, METADATA_FILENAME + ) + + logger.info("Parsing bulk upload metadata") + staging_metadata_list = csv_to_staging_metadata(metadata_file) + + logger.info("Finished parsing metadata") + send_metadata_to_sqs(staging_metadata_list, metadata_queue_url) + + logger.info("Sent bulk upload metadata to sqs queue") + except pydantic.ValidationError as e: + logger.info("Failed to parse metadata.csv") + logger.error(str(e)) + except KeyError as e: + logger.info("Failed due to missing key") + logger.error(str(e)) + except ClientError as e: + logger.error(str(e)) + + +def download_metadata_from_s3(staging_bucket_name: str, metadata_filename: str): + s3_service = S3Service() + temp_dir = tempfile.mkdtemp() + + local_file_path = os.path.join(temp_dir, metadata_filename) + s3_service.download_file( + s3_bucket_name=staging_bucket_name, + file_key=metadata_filename, + download_path=local_file_path, + ) + return local_file_path + + +def csv_to_staging_metadata(csv_file_path: str) -> list[StagingMetadata]: + patients = {} + with open(csv_file_path, mode="r") as csv_file_handler: + csv_reader: Iterable[dict] = csv.DictReader(csv_file_handler) + for row in csv_reader: + file_metadata = MetadataFile.model_validate(row) + nhs_number = row[NHS_NUMBER_FIELD_NAME] + if nhs_number not in patients: + patients[nhs_number] = [file_metadata] + else: + patients[nhs_number] += [file_metadata] + + return [ + StagingMetadata(nhs_number=nhs_number, files=patients[nhs_number]) + for nhs_number in patients + ] + + +def send_metadata_to_sqs( + staging_metadata_list: list[StagingMetadata], metadata_queue_url: str +) -> None: + sqs_service = SQSService() + + for staging_metadata in staging_metadata_list: + nhs_number = staging_metadata.nhs_number + logger.info(f"Sending metadata for patientId: {nhs_number}") + + sqs_service.send_message_with_nhs_number_attr( + queue_url=metadata_queue_url, + message_body=staging_metadata.model_dump_json(by_alias=True), + nhs_number=nhs_number, + ) diff --git a/lambdas/models/staging_metadata.py b/lambdas/models/staging_metadata.py new file mode 100644 index 000000000..c624eb4c6 --- /dev/null +++ b/lambdas/models/staging_metadata.py @@ -0,0 +1,34 @@ +from typing import Optional + +from pydantic import BaseModel, ConfigDict, Field + +METADATA_FILENAME = "metadata.csv" +NHS_NUMBER_FIELD_NAME = "NHS-NO" + + +def to_upper_case_with_hyphen(field_name: str) -> str: + return field_name.upper().replace("_", "-") + + +class MetadataFile(BaseModel): + model_config = ConfigDict( + alias_generator=to_upper_case_with_hyphen, populate_by_name=True + ) + + file_path: str = Field(alias="FILEPATH") + page_count: str = Field(alias="PAGE COUNT") + gp_practice_code: str + nhs_number: str = Field(exclude=True, alias=NHS_NUMBER_FIELD_NAME) + section: str + sub_section: Optional[str] + scan_date: str + scan_id: str + user_id: str + upload: str + + +class StagingMetadata(BaseModel): + model_config = ConfigDict(populate_by_name=True) + + nhs_number: str = Field(alias=NHS_NUMBER_FIELD_NAME) + files: list[MetadataFile] diff --git a/lambdas/services/sqs_service.py b/lambdas/services/sqs_service.py new file mode 100644 index 000000000..6662d0c7f --- /dev/null +++ b/lambdas/services/sqs_service.py @@ -0,0 +1,24 @@ +import logging + +import boto3 +from botocore.client import Config as BotoConfig + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + + +class SQSService: + def __init__(self): + config = BotoConfig(retries={"max_attempts": 3, "mode": "standard"}) + self.client = boto3.client("sqs", config=config) + + def send_message_with_nhs_number_attr( + self, queue_url: str, message_body: str, nhs_number: str + ): + self.client.send_message( + QueueUrl=queue_url, + MessageAttributes={ + "NhsNumber": {"DataType": "String", "StringValue": nhs_number}, + }, + MessageBody=message_body, + ) diff --git a/lambdas/tests/unit/conftest.py b/lambdas/tests/unit/conftest.py index 0879fdf15..710c6a883 100644 --- a/lambdas/tests/unit/conftest.py +++ b/lambdas/tests/unit/conftest.py @@ -16,12 +16,17 @@ MOCK_ZIP_OUTPUT_BUCKET_ENV_NAME = "ZIPPED_STORE_BUCKET_NAME" MOCK_ZIP_TRACE_TABLE_ENV_NAME = "ZIPPED_STORE_DYNAMODB_NAME" +MOCK_LG_STAGING_STORE_BUCKET_ENV_NAME = "STAGING_STORE_BUCKET_NAME" +MOCK_LG_METADATA_SQS_QUEUE_ENV_NAME = "METADATA_SQS_QUEUE_URL" + MOCK_ARF_TABLE_NAME = "test_arf_dynamoDB_table" MOCK_LG_TABLE_NAME = "test_lg_dynamoDB_table" MOCK_ARF_BUCKET = "test_arf_s3_bucket" MOCK_LG_BUCKET = "test_lg_s3_bucket" MOCK_ZIP_OUTPUT_BUCKET = "test_s3_output_bucket" MOCK_ZIP_TRACE_TABLE = "test_zip_table" +MOCK_LG_STAGING_STORE_BUCKET = "test_staging_bulk_store" +MOCK_LG_METADATA_SQS_QUEUE = "test_bulk_upload_metadata_queue" TEST_NHS_NUMBER = "1111111111" TEST_OBJECT_KEY = "1234-4567-8912-HSDF-TEST" @@ -43,3 +48,7 @@ def set_env(monkeypatch): ) monkeypatch.setenv(MOCK_ZIP_OUTPUT_BUCKET_ENV_NAME, MOCK_ZIP_OUTPUT_BUCKET) monkeypatch.setenv(MOCK_ZIP_TRACE_TABLE_ENV_NAME, MOCK_ZIP_TRACE_TABLE) + monkeypatch.setenv( + MOCK_LG_STAGING_STORE_BUCKET_ENV_NAME, MOCK_LG_STAGING_STORE_BUCKET + ) + monkeypatch.setenv(MOCK_LG_METADATA_SQS_QUEUE_ENV_NAME, MOCK_LG_METADATA_SQS_QUEUE) diff --git a/lambdas/tests/unit/handlers/test_bulk_upload_metadata_handler.py b/lambdas/tests/unit/handlers/test_bulk_upload_metadata_handler.py new file mode 100644 index 000000000..31501ae8f --- /dev/null +++ b/lambdas/tests/unit/handlers/test_bulk_upload_metadata_handler.py @@ -0,0 +1,202 @@ +import tempfile +from unittest.mock import call + +import pytest +from botocore.exceptions import ClientError +from handlers.bulk_upload_metadata_handler import (csv_to_staging_metadata, + download_metadata_from_s3, + lambda_handler, + send_metadata_to_sqs) +from models.staging_metadata import METADATA_FILENAME +from pydantic import ValidationError +from tests.unit.conftest import (MOCK_LG_METADATA_SQS_QUEUE, + MOCK_LG_STAGING_STORE_BUCKET) +from tests.unit.helpers.data.staging_metadata.expected_data import ( + EXPECTED_PARSED_METADATA, EXPECTED_SQS_MSG_FOR_PATIENT_1234567890, + EXPECTED_SQS_MSG_FOR_PATIENT_1234567891) + +MOCK_METADATA_CSV = "tests/unit/helpers/data/staging_metadata/metadata.csv" +MOCK_INVALID_METADATA_CSV = ( + "tests/unit/helpers/data/staging_metadata/metadata_invalid.csv" +) +MOCK_TEMP_FOLDER = "tests/unit/helpers/data/staging_metadata" + + +def test_lambda_send_metadata_to_sqs_queue(set_env, mocker, mock_sqs_service): + mocker.patch( + "handlers.bulk_upload_metadata_handler.download_metadata_from_s3", + return_value=MOCK_METADATA_CSV, + ) + + lambda_handler(None, None) + + assert mock_sqs_service.send_message_with_nhs_number_attr.call_count == 2 + + expected_calls = [ + call( + queue_url=MOCK_LG_METADATA_SQS_QUEUE, + message_body=EXPECTED_SQS_MSG_FOR_PATIENT_1234567890, + nhs_number="1234567890", + ), + call( + queue_url=MOCK_LG_METADATA_SQS_QUEUE, + message_body=EXPECTED_SQS_MSG_FOR_PATIENT_1234567891, + nhs_number="1234567891", + ), + ] + mock_sqs_service.send_message_with_nhs_number_attr.assert_has_calls(expected_calls) + + +def test_handler_log_error_when_fail_to_get_metadata_csv_from_s3( + set_env, mock_s3_service, mock_sqs_service, caplog +): + mock_s3_service.download_file.side_effect = ClientError( + {"Error": {"Code": "500", "Message": "file not exist in bucket"}}, + "s3_get_object", + ) + expected_err_msg = "An error occurred (500) when calling the s3_get_object operation: file not exist in bucket" + + lambda_handler(None, None) + + assert caplog.records[-1].message == expected_err_msg + assert caplog.records[-1].levelname == "ERROR" + + mock_sqs_service.send_message_with_nhs_number_attr.assert_not_called() + + +def test_handler_log_error_when_metadata_csv_is_invalid( + set_env, mocker, mock_sqs_service, caplog +): + mocker.patch( + "handlers.bulk_upload_metadata_handler.download_metadata_from_s3", + return_value=MOCK_INVALID_METADATA_CSV, + ) + + lambda_handler(None, None) + + assert "validation errors for MetadataFile" in caplog.records[-1].message + assert caplog.records[-1].levelname == "ERROR" + + mock_sqs_service.send_message_with_nhs_number_attr.assert_not_called() + + +def test_handler_log_error_when_failed_to_send_message_to_sqs( + set_env, mock_s3_service, mock_sqs_service, mock_tempfile, caplog +): + mock_sqs_service.send_message_with_nhs_number_attr.side_effect = ClientError( + { + "Error": { + "Code": "AWS.SimpleQueueService.NonExistentQueue", + "Message": "The specified queue does not exist", + } + }, + "SendMessage", + ) + expected_err_msg = ( + "An error occurred (AWS.SimpleQueueService.NonExistentQueue) when calling the SendMessage operation:" + " The specified queue does not exist" + ) + + lambda_handler(None, None) + + assert caplog.records[-1].message == expected_err_msg + assert caplog.records[-1].levelname == "ERROR" + + +def test_download_metadata_from_s3(mock_s3_service, mock_tempfile): + actual = download_metadata_from_s3( + staging_bucket_name=MOCK_LG_STAGING_STORE_BUCKET, + metadata_filename=METADATA_FILENAME, + ) + expected = MOCK_METADATA_CSV + + mock_s3_service.download_file.assert_called_with( + s3_bucket_name=MOCK_LG_STAGING_STORE_BUCKET, + file_key=METADATA_FILENAME, + download_path=f"{MOCK_TEMP_FOLDER}/{METADATA_FILENAME}", + ) + assert actual == expected + + +def test_download_metadata_from_s3_raise_error_when_failed_to_download( + mock_s3_service, mock_tempfile +): + mock_s3_service.download_file.side_effect = ClientError( + {"Error": {"Code": "500", "Message": "file not exist in bucket"}}, + "s3_get_object", + ) + + with pytest.raises(ClientError): + download_metadata_from_s3( + staging_bucket_name=MOCK_LG_STAGING_STORE_BUCKET, + metadata_filename=METADATA_FILENAME, + ) + + +def test_csv_to_staging_metadata(): + actual = csv_to_staging_metadata(MOCK_METADATA_CSV) + expected = EXPECTED_PARSED_METADATA + assert actual == expected + + +def test_csv_to_staging_metadata_raise_error_when_metadata_invalid(): + with pytest.raises(ValidationError): + csv_to_staging_metadata(MOCK_INVALID_METADATA_CSV) + + +def test_send_metadata_to_sqs(mock_sqs_service): + mock_parsed_metadata = EXPECTED_PARSED_METADATA + send_metadata_to_sqs(mock_parsed_metadata, MOCK_LG_METADATA_SQS_QUEUE) + + assert mock_sqs_service.send_message_with_nhs_number_attr.call_count == 2 + + expected_calls = [ + call( + queue_url=MOCK_LG_METADATA_SQS_QUEUE, + message_body=EXPECTED_SQS_MSG_FOR_PATIENT_1234567890, + nhs_number="1234567890", + ), + call( + queue_url=MOCK_LG_METADATA_SQS_QUEUE, + message_body=EXPECTED_SQS_MSG_FOR_PATIENT_1234567891, + nhs_number="1234567891", + ), + ] + mock_sqs_service.send_message_with_nhs_number_attr.assert_has_calls(expected_calls) + + +def test_send_metadata_to_sqs_raise_error_when_fail_to_send_message(mock_sqs_service): + mock_sqs_service.send_message_with_nhs_number_attr.side_effect = ClientError( + { + "Error": { + "Code": "AWS.SimpleQueueService.NonExistentQueue", + "Message": "The specified queue does not exist", + } + }, + "SendMessage", + ) + + with pytest.raises(ClientError): + send_metadata_to_sqs(EXPECTED_PARSED_METADATA, MOCK_LG_METADATA_SQS_QUEUE) + + +@pytest.fixture +def mock_s3_service(mocker): + patched_instance = mocker.patch( + "handlers.bulk_upload_metadata_handler.S3Service" + ).return_value + yield patched_instance + + +@pytest.fixture +def mock_tempfile(mocker): + mocker.patch.object(tempfile, "mkdtemp", return_value=MOCK_TEMP_FOLDER) + yield + + +@pytest.fixture +def mock_sqs_service(mocker): + patched_instance = mocker.patch( + "handlers.bulk_upload_metadata_handler.SQSService" + ).return_value + yield patched_instance diff --git a/lambdas/tests/unit/helpers/data/staging_metadata/__init__.py b/lambdas/tests/unit/helpers/data/staging_metadata/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lambdas/tests/unit/helpers/data/staging_metadata/expect_sqs_msg_for_patient_1234567890.json b/lambdas/tests/unit/helpers/data/staging_metadata/expect_sqs_msg_for_patient_1234567890.json new file mode 100644 index 000000000..2fe3d9ecc --- /dev/null +++ b/lambdas/tests/unit/helpers/data/staging_metadata/expect_sqs_msg_for_patient_1234567890.json @@ -0,0 +1 @@ +{"NHS-NO":"1234567890","files":[{"FILEPATH":"/1234567890/1of2_Lloyd_George_Record_[Joe Bloggs]_[1234567890]_[25-12-2019].pdf","PAGE COUNT":"","GP-PRACTICE-CODE":"","SECTION":"LG","SUB-SECTION":"","SCAN-DATE":"03/09/2022","SCAN-ID":"NEC","USER-ID":"NEC","UPLOAD":"04/10/2023"},{"FILEPATH":"/1234567890/2of2_Lloyd_George_Record_[Joe Bloggs]_[1234567890]_[25-12-2019].pdf","PAGE COUNT":"","GP-PRACTICE-CODE":"","SECTION":"LG","SUB-SECTION":"","SCAN-DATE":"03/09/2022","SCAN-ID":"NEC","USER-ID":"NEC","UPLOAD":"04/10/2023"}]} \ No newline at end of file diff --git a/lambdas/tests/unit/helpers/data/staging_metadata/expect_sqs_msg_for_patient_1234567891.json b/lambdas/tests/unit/helpers/data/staging_metadata/expect_sqs_msg_for_patient_1234567891.json new file mode 100644 index 000000000..509bf00e5 --- /dev/null +++ b/lambdas/tests/unit/helpers/data/staging_metadata/expect_sqs_msg_for_patient_1234567891.json @@ -0,0 +1 @@ +{"NHS-NO":"1234567891","files":[{"FILEPATH":"1of1_Lloyd_George_Record_[Joe Bloggs_invalid]_[1234567891]_[25-12-2019].txt","PAGE COUNT":"","GP-PRACTICE-CODE":"","SECTION":"LG","SUB-SECTION":"","SCAN-DATE":"04/09/2022","SCAN-ID":"NEC","USER-ID":"NEC","UPLOAD":"04/10/2023"}]} \ No newline at end of file diff --git a/lambdas/tests/unit/helpers/data/staging_metadata/expected_data.py b/lambdas/tests/unit/helpers/data/staging_metadata/expected_data.py new file mode 100644 index 000000000..116621c2e --- /dev/null +++ b/lambdas/tests/unit/helpers/data/staging_metadata/expected_data.py @@ -0,0 +1,64 @@ +import os + +from models.staging_metadata import MetadataFile, StagingMetadata + +patient_1_file_1 = MetadataFile( + file_path="/1234567890/1of2_Lloyd_George_Record_[Joe Bloggs]_[1234567890]_[25-12-2019].pdf", + page_count="", + gp_practice_code="", + nhs_number="1234567890", + section="LG", + sub_section="", + scan_date="03/09/2022", + scan_id="NEC", + user_id="NEC", + upload="04/10/2023", +) +patient_1_file_2 = MetadataFile( + file_path="/1234567890/2of2_Lloyd_George_Record_[Joe Bloggs]_[1234567890]_[25-12-2019].pdf", + page_count="", + gp_practice_code="", + nhs_number="1234567890", + section="LG", + sub_section="", + scan_date="03/09/2022", + scan_id="NEC", + user_id="NEC", + upload="04/10/2023", +) + +patient_1 = StagingMetadata( + nhs_number="1234567890", files=[patient_1_file_1, patient_1_file_2] +) + +patient_2_file_1 = MetadataFile( + file_path="1of1_Lloyd_George_Record_[Joe Bloggs_invalid]_[1234567891]_[25-12-2019].txt", + page_count="", + nhs_number="1234567891", + gp_practice_code="", + section="LG", + sub_section="", + scan_date="04/09/2022", + scan_id="NEC", + user_id="NEC", + upload="04/10/2023", +) + +patient_2 = StagingMetadata(nhs_number="1234567891", files=[patient_2_file_1]) + +EXPECTED_PARSED_METADATA = [patient_1, patient_2] + + +def readfile(filename: str) -> str: + filepath = os.path.join(os.path.dirname(__file__), filename) + with open(filepath, "r") as file: + file_content = file.read() + return file_content + + +EXPECTED_SQS_MSG_FOR_PATIENT_1234567890 = readfile( + "expect_sqs_msg_for_patient_1234567890.json" +) +EXPECTED_SQS_MSG_FOR_PATIENT_1234567891 = readfile( + "expect_sqs_msg_for_patient_1234567891.json" +) diff --git a/lambdas/tests/unit/helpers/data/staging_metadata/metadata.csv b/lambdas/tests/unit/helpers/data/staging_metadata/metadata.csv new file mode 100644 index 000000000..84159d41f --- /dev/null +++ b/lambdas/tests/unit/helpers/data/staging_metadata/metadata.csv @@ -0,0 +1,4 @@ +FILEPATH,PAGE COUNT,GP-PRACTICE-CODE,NHS-NO,SECTION,SUB-SECTION,SCAN-DATE,SCAN-ID,USER-ID,UPLOAD +/1234567890/1of2_Lloyd_George_Record_[Joe Bloggs]_[1234567890]_[25-12-2019].pdf,,,1234567890,LG,,03/09/2022,NEC,NEC,04/10/2023 +/1234567890/2of2_Lloyd_George_Record_[Joe Bloggs]_[1234567890]_[25-12-2019].pdf,,,1234567890,LG,,03/09/2022,NEC,NEC,04/10/2023 +1of1_Lloyd_George_Record_[Joe Bloggs_invalid]_[1234567891]_[25-12-2019].txt,,,1234567891,LG,,04/09/2022,NEC,NEC,04/10/2023 diff --git a/lambdas/tests/unit/helpers/data/staging_metadata/metadata_invalid.csv b/lambdas/tests/unit/helpers/data/staging_metadata/metadata_invalid.csv new file mode 100644 index 000000000..ee3a9372d --- /dev/null +++ b/lambdas/tests/unit/helpers/data/staging_metadata/metadata_invalid.csv @@ -0,0 +1,3 @@ +file_h,page_count,gp_practice_coxcan_date0]_[25-12-2019].pdf,,,LG,,03/09/2022,NEC,NEC,04/10/2023 +1of1_Lloyd_George_Record_[Joexalid]_[1234567891]_[25-12-2019].txt,,,LG,,04/09/2022,NEC,NEC,04/10/2023 +apple,orange,banana,kiwi,papaya,012345789 diff --git a/lambdas/tests/unit/services/test_sqs_service.py b/lambdas/tests/unit/services/test_sqs_service.py new file mode 100644 index 000000000..2f844a384 --- /dev/null +++ b/lambdas/tests/unit/services/test_sqs_service.py @@ -0,0 +1,34 @@ +import json + +from services.sqs_service import SQSService +from tests.unit.conftest import MOCK_LG_METADATA_SQS_QUEUE, TEST_NHS_NUMBER + + +def test_send_message_with_nhs_number_attr(set_env, mocker): + mocked_sqs_client = mocker.MagicMock() + + def return_mock(service_name, **_kwargs): + if service_name == "sqs": + return mocked_sqs_client + + mocker.patch("boto3.client", side_effect=return_mock) + + service = SQSService() + + test_message_body = json.dumps( + {"NHS-NO": "1234567890", "files": ["file1.pdf", "file2.pdf"]} + ) + + service.send_message_with_nhs_number_attr( + queue_url=MOCK_LG_METADATA_SQS_QUEUE, + message_body=test_message_body, + nhs_number=TEST_NHS_NUMBER, + ) + + mocked_sqs_client.send_message.assert_called_with( + QueueUrl=MOCK_LG_METADATA_SQS_QUEUE, + MessageAttributes={ + "NhsNumber": {"DataType": "String", "StringValue": TEST_NHS_NUMBER}, + }, + MessageBody=test_message_body, + )