From c576556cd29f427083fc458ba6063887862b98b0 Mon Sep 17 00:00:00 2001 From: NogaNHS Date: Thu, 19 Oct 2023 17:14:30 +0100 Subject: [PATCH 01/46] prmdr-370 initial commit --- lambdas/models/pds_models.py | 24 ++++++++++++++++++++++-- lambdas/utils/lloyd_george_validator.py | 12 ++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/lambdas/models/pds_models.py b/lambdas/models/pds_models.py index bd6b481c8..7b3d1e0e8 100644 --- a/lambdas/models/pds_models.py +++ b/lambdas/models/pds_models.py @@ -28,11 +28,19 @@ class Security(BaseModel): code: str display: str - class Meta(BaseModel): versionId: str security: list[Security] +class GPIdentifier(BaseModel): + system: Optional[str] + value: str + period: Optional[Period] + +class GeneralPractitioner(BaseModel): + id: Optional[str] + type: Optional[str] + identifier: GPIdentifier class PatientDetails(BaseModel): model_config = conf @@ -44,7 +52,7 @@ class PatientDetails(BaseModel): nhs_number: str superseded: bool restricted: bool - + general_practice = GeneralPractitioner class Patient(BaseModel): model_config = conf @@ -91,3 +99,15 @@ def get_patient_details(self, nhs_number) -> PatientDetails: superseded=bool(nhs_number == id), restricted=not self.is_unrestricted(), ) + + def get_minimum_patient_details(self, nhs_number) -> PatientDetails: + return PatientDetails( + givenName=self.get_current_usual_name().given, + familyName=self.get_current_usual_name().family, + birthDate=self.birth_date + if self.is_unrestricted() + else "", + nhsNumber=self.id, + superseded=bool(nhs_number == id), + restricted=not self.is_unrestricted(), + ) diff --git a/lambdas/utils/lloyd_george_validator.py b/lambdas/utils/lloyd_george_validator.py index 0133343e5..269a91384 100644 --- a/lambdas/utils/lloyd_george_validator.py +++ b/lambdas/utils/lloyd_george_validator.py @@ -2,6 +2,8 @@ from typing import Optional from models.nhs_document_reference import NHSDocumentReference +from services.pds_api_service import PdsApiService +from services.ssm_service import SSMService class LGInvalidFilesException(Exception): @@ -87,3 +89,13 @@ def check_for_file_names_agrees_with_each_other(file_name_list: list[str]): ] if len(set(expected_common_part)) != 1: raise LGInvalidFilesException("File names does not match with each other") + +def validate_nhs_number(file_name_list: list[str], nhs_number: str): + # Check file names match with the nhs number in metadata.csv + file_name_info = extract_info_from_filename(file_name_list[0]) + if file_name_info["nhs_number"] != nhs_number: + raise LGInvalidFilesException( + "NHS number in file names does not match the given NHS number" + ) + pds_service = PdsApiService(SSMService()) + pds_service.fetch_patient_details(nhs_number=nhs_number) \ No newline at end of file From ae1933ad09cc87c581e0e01c7cac32be0d174cf5 Mon Sep 17 00:00:00 2001 From: NogaNHS Date: Fri, 20 Oct 2023 17:43:10 +0100 Subject: [PATCH 02/46] prmdr-370 added checks to validtors --- lambdas/enums/pds_ssm_parameters.py | 1 + lambdas/models/pds_models.py | 26 +++++++++++++------ lambdas/utils/lloyd_george_validator.py | 33 ++++++++++++++++++++++--- 3 files changed, 50 insertions(+), 10 deletions(-) diff --git a/lambdas/enums/pds_ssm_parameters.py b/lambdas/enums/pds_ssm_parameters.py index 834c9b06a..f2db9203d 100644 --- a/lambdas/enums/pds_ssm_parameters.py +++ b/lambdas/enums/pds_ssm_parameters.py @@ -8,3 +8,4 @@ class SSMParameter(Enum): NHS_OAUTH_ENDPOINT = "/prs/dev/user-input/nhs-oauth-endpoint" PDS_API_ENDPOINT = "/prs/dev/user-input/pds-fhir-endpoint" PDS_API_ACCESS_TOKEN = "/prs/dev-ndr/pds-fhir-access-token" + GP_ODS_CODE = "/ndr/GP_ODS_code" diff --git a/lambdas/models/pds_models.py b/lambdas/models/pds_models.py index 7b3d1e0e8..03b889181 100644 --- a/lambdas/models/pds_models.py +++ b/lambdas/models/pds_models.py @@ -1,3 +1,5 @@ +import datetime +from datetime import date from typing import Optional from models.config import conf @@ -5,8 +7,8 @@ class Period(BaseModel): - start: str - end: Optional[str] = "" + start: date = None + end: Optional[date] = None class Address(BaseModel): @@ -47,26 +49,28 @@ class PatientDetails(BaseModel): given_Name: Optional[list[str]] = [] family_name: Optional[str] = "" - birth_date: Optional[str] = "" + birth_date: Optional[date] = None postal_code: Optional[str] = "" nhs_number: str superseded: bool restricted: bool - general_practice = GeneralPractitioner + general_practice_ods = Optional[str] class Patient(BaseModel): model_config = conf id: str - birth_date: str + birth_date: date address: Optional[list[Address]] = [] name: list[Name] meta: Meta + general_practitioner = Optional[list[GeneralPractitioner]] + def get_security(self) -> Security: security = self.meta.security[0] if self.meta.security[0] else None if not security: - raise ValidationError("No valid security found in patient meta") + raise ValueError("No valid security found in patient meta") return security @@ -87,6 +91,13 @@ def get_current_home_address(self) -> Optional[Address]: if entry.use.lower() == "home": return entry + def get_ods_code_for_gp(self) -> str: + for entry in self.general_practice: + gp_end_date = entry.identifier.period.end + if not gp_end_date or gp_end_date > date.today(): + return entry.identifier.value + raise ValueError('No active GP practice for the patient') + def get_patient_details(self, nhs_number) -> PatientDetails: return PatientDetails( givenName=self.get_current_usual_name().given, @@ -104,7 +115,8 @@ def get_minimum_patient_details(self, nhs_number) -> PatientDetails: return PatientDetails( givenName=self.get_current_usual_name().given, familyName=self.get_current_usual_name().family, - birthDate=self.birth_date + birthDate=self.birth_date, + generalPracticeOds=self.get_ods_code_for_gp() if self.is_unrestricted() else "", nhsNumber=self.id, diff --git a/lambdas/utils/lloyd_george_validator.py b/lambdas/utils/lloyd_george_validator.py index 269a91384..17325e0be 100644 --- a/lambdas/utils/lloyd_george_validator.py +++ b/lambdas/utils/lloyd_george_validator.py @@ -1,7 +1,14 @@ +import datetime import re from typing import Optional +from botocore.exceptions import ClientError +from pydantic import ValidationError +from requests import HTTPError + +from enums.pds_ssm_parameters import SSMParameter from models.nhs_document_reference import NHSDocumentReference +from models.pds_models import Patient from services.pds_api_service import PdsApiService from services.ssm_service import SSMService @@ -90,12 +97,32 @@ def check_for_file_names_agrees_with_each_other(file_name_list: list[str]): if len(set(expected_common_part)) != 1: raise LGInvalidFilesException("File names does not match with each other") -def validate_nhs_number(file_name_list: list[str], nhs_number: str): +def validate_nhs_number(file_name_list: list[str], nhs_number: str, patient_name: str, date_of_birth:str): # Check file names match with the nhs number in metadata.csv file_name_info = extract_info_from_filename(file_name_list[0]) if file_name_info["nhs_number"] != nhs_number: raise LGInvalidFilesException( "NHS number in file names does not match the given NHS number" ) - pds_service = PdsApiService(SSMService()) - pds_service.fetch_patient_details(nhs_number=nhs_number) \ No newline at end of file + try: + pds_service = PdsApiService(SSMService()) + pds_response = pds_service.pds_request(nsh_number=nhs_number, retry_on_expired=True) + pds_response.raise_for_status() + patient = Patient.model_validate(pds_response.json()) + patient_details = patient.get_minimum_patient_details(nhs_number) + current_user_ods = get_user_ods_code() + if patient_details.general_practice_ods != current_user_ods: + raise ValidationError("User is not allowed to access patient") + date_of_birth = datetime.datetime.strptime(date_of_birth, '%d/%m/%Y') + if patient_details.birth_date != date_of_birth: + raise ValidationError("Patient DoB does not match our records") + patient_full_name = patient_details.given_Name + patient_details.family_name + if patient_full_name != patient_name: + raise ValidationError("Patient name does not match our records") + + except (HTTPError, ValidationError, ClientError, ValueError) as e: + pass + +def get_user_ods_code(): + ssm_servie = SSMService() + return ssm_servie.get_ssm_parameter(SSMParameter.GP_ODS_CODE.value) \ No newline at end of file From f061ebf30c5638ed38ac67c3a1999a03eb823e5b Mon Sep 17 00:00:00 2001 From: NogaNHS Date: Mon, 23 Oct 2023 09:26:10 +0100 Subject: [PATCH 03/46] prmdr-370 fix pydantic syntax --- lambdas/models/pds_models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lambdas/models/pds_models.py b/lambdas/models/pds_models.py index 03b889181..1e16f7830 100644 --- a/lambdas/models/pds_models.py +++ b/lambdas/models/pds_models.py @@ -54,7 +54,7 @@ class PatientDetails(BaseModel): nhs_number: str superseded: bool restricted: bool - general_practice_ods = Optional[str] + general_practice_ods: Optional[str] = "" class Patient(BaseModel): model_config = conf @@ -64,7 +64,7 @@ class Patient(BaseModel): address: Optional[list[Address]] = [] name: list[Name] meta: Meta - general_practitioner = Optional[list[GeneralPractitioner]] + general_practitioner: Optional[list[GeneralPractitioner]] = [] def get_security(self) -> Security: From e7ae72996c9d1f2d47941f70709e1b5d18941946 Mon Sep 17 00:00:00 2001 From: NogaNHS Date: Mon, 23 Oct 2023 09:39:27 +0100 Subject: [PATCH 04/46] fix unit test --- .../tests/unit/handlers/test_search_patient_details_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lambdas/tests/unit/handlers/test_search_patient_details_handler.py b/lambdas/tests/unit/handlers/test_search_patient_details_handler.py index 13223469f..1d6a73192 100644 --- a/lambdas/tests/unit/handlers/test_search_patient_details_handler.py +++ b/lambdas/tests/unit/handlers/test_search_patient_details_handler.py @@ -34,7 +34,7 @@ def test_lambda_handler_valid_id_returns_200( expected = { "body": '{"givenName":["Jane"],"familyName":"Smith","birthDate":"2010-10-22",' '"postalCode":"LS1 6AE","nhsNumber":"9000000009","superseded":false,' - '"restricted":false}', + '"restricted":false,"generalPracticeOds":""}', "headers": { "Access-Control-Allow-Methods": "GET", "Access-Control-Allow-Origin": "*", From f57cd1accb900c28b2eefd64995e9b0b4162deb8 Mon Sep 17 00:00:00 2001 From: NogaNHS Date: Mon, 23 Oct 2023 12:49:24 +0100 Subject: [PATCH 05/46] small fixes --- lambdas/models/pds_models.py | 2 +- lambdas/utils/lloyd_george_validator.py | 23 +++++++++++++---------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/lambdas/models/pds_models.py b/lambdas/models/pds_models.py index 1e16f7830..5107a2e8b 100644 --- a/lambdas/models/pds_models.py +++ b/lambdas/models/pds_models.py @@ -92,7 +92,7 @@ def get_current_home_address(self) -> Optional[Address]: return entry def get_ods_code_for_gp(self) -> str: - for entry in self.general_practice: + for entry in self.general_practitioner: gp_end_date = entry.identifier.period.end if not gp_end_date or gp_end_date > date.today(): return entry.identifier.value diff --git a/lambdas/utils/lloyd_george_validator.py b/lambdas/utils/lloyd_george_validator.py index 17325e0be..b9f298877 100644 --- a/lambdas/utils/lloyd_george_validator.py +++ b/lambdas/utils/lloyd_george_validator.py @@ -1,4 +1,5 @@ import datetime +import logging import re from typing import Optional @@ -12,6 +13,8 @@ from services.pds_api_service import PdsApiService from services.ssm_service import SSMService +logger = logging.getLogger() +logger.setLevel(logging.INFO) class LGInvalidFilesException(Exception): pass @@ -63,13 +66,10 @@ def validate_lg_file_names(file_name_list: list[str], nhs_number: Optional[str] check_for_duplicate_files(file_name_list) check_for_file_names_agrees_with_each_other(file_name_list) + if nhs_number: # Check file names match with the nhs number in metadata.csv - file_name_info = extract_info_from_filename(file_name_list[0]) - if file_name_info["nhs_number"] != nhs_number: - raise LGInvalidFilesException( - "NHS number in file names does not match the given NHS number" - ) + validate_nhs_number(file_name_list, nhs_number) def extract_info_from_filename(filename: str) -> dict: @@ -97,13 +97,15 @@ def check_for_file_names_agrees_with_each_other(file_name_list: list[str]): if len(set(expected_common_part)) != 1: raise LGInvalidFilesException("File names does not match with each other") -def validate_nhs_number(file_name_list: list[str], nhs_number: str, patient_name: str, date_of_birth:str): +def validate_nhs_number(file_name_list: list[str], nhs_number: str): # Check file names match with the nhs number in metadata.csv file_name_info = extract_info_from_filename(file_name_list[0]) if file_name_info["nhs_number"] != nhs_number: raise LGInvalidFilesException( "NHS number in file names does not match the given NHS number" ) + patient_name = file_name_info["patient_name"] + date_of_birth = file_name_info["date_of_birth"] try: pds_service = PdsApiService(SSMService()) pds_response = pds_service.pds_request(nsh_number=nhs_number, retry_on_expired=True) @@ -112,16 +114,17 @@ def validate_nhs_number(file_name_list: list[str], nhs_number: str, patient_name patient_details = patient.get_minimum_patient_details(nhs_number) current_user_ods = get_user_ods_code() if patient_details.general_practice_ods != current_user_ods: - raise ValidationError("User is not allowed to access patient") + raise LGInvalidFilesException("User is not allowed to access patient") date_of_birth = datetime.datetime.strptime(date_of_birth, '%d/%m/%Y') if patient_details.birth_date != date_of_birth: - raise ValidationError("Patient DoB does not match our records") + raise LGInvalidFilesException("Patient DoB does not match our records") patient_full_name = patient_details.given_Name + patient_details.family_name if patient_full_name != patient_name: - raise ValidationError("Patient name does not match our records") + raise LGInvalidFilesException("Patient name does not match our records") except (HTTPError, ValidationError, ClientError, ValueError) as e: - pass + logger.error(e) + raise LGInvalidFilesException(e) def get_user_ods_code(): ssm_servie = SSMService() From 86553d679fb42a4ab04370762ac026b95050e886 Mon Sep 17 00:00:00 2001 From: NogaNHS Date: Mon, 23 Oct 2023 17:27:26 +0100 Subject: [PATCH 06/46] add unit tests --- .../helpers/data/pds/pds_patient_response.py | 2 +- lambdas/tests/unit/helpers/mock_services.py | 16 ++++++++++ .../unit/services/test_bulk_upload_service.py | 7 +---- .../unit/services/test_pds_api_service.py | 13 ++------ .../unit/utils/test_lloyd_george_validator.py | 24 +++++++++++++-- lambdas/utils/lloyd_george_validator.py | 30 ++++++++++--------- 6 files changed, 58 insertions(+), 34 deletions(-) create mode 100644 lambdas/tests/unit/helpers/mock_services.py diff --git a/lambdas/tests/unit/helpers/data/pds/pds_patient_response.py b/lambdas/tests/unit/helpers/data/pds/pds_patient_response.py index 3d92888f2..641f03915 100644 --- a/lambdas/tests/unit/helpers/data/pds/pds_patient_response.py +++ b/lambdas/tests/unit/helpers/data/pds/pds_patient_response.py @@ -63,7 +63,7 @@ "identifier": { "system": "https://fhir.nhs.uk/Id/ods-organization-code", "value": "Y12345", - "period": {"start": "2020-01-01", "end": "2021-12-31"}, + "period": {"start": "2020-01-01"}, }, } ], diff --git a/lambdas/tests/unit/helpers/mock_services.py b/lambdas/tests/unit/helpers/mock_services.py new file mode 100644 index 000000000..696bcff1b --- /dev/null +++ b/lambdas/tests/unit/helpers/mock_services.py @@ -0,0 +1,16 @@ +class FakeSSMService: + def __init__(self, *arg, **kwargs): + pass + + def get_ssm_parameters(self, parameters_keys, *arg, **kwargs): + return {parameter: f"test_value_{parameter}" for parameter in parameters_keys} + + def update_ssm_parameter(self, *arg, **kwargs): + pass + +class FakePDSService: + def __init__(self, *arg, **kwargs): + pass + + def pds_request(self, *arg, **kwargs): + pass \ No newline at end of file diff --git a/lambdas/tests/unit/services/test_bulk_upload_service.py b/lambdas/tests/unit/services/test_bulk_upload_service.py index c8cc0650e..350e8a424 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_service.py @@ -41,6 +41,7 @@ def test_handle_sqs_message_rollback_transaction_when_validation_pass_but_file_t service = BulkUploadService() service.s3_service = mocker.MagicMock() service.dynamo_service = mocker.MagicMock() + service.validate_files = mocker.MagicMock() mock_client_error = ClientError( {"Error": {"Code": "404", "Message": "Object not found in bucket"}}, @@ -77,12 +78,6 @@ def test_validate_files_raise_LGInvalidFilesException_when_file_names_invalid(se service.validate_files(TEST_STAGING_METADATA_WITH_INVALID_FILENAME) -def test_validate_files_does_not_raise_error_when_file_names_valid(set_env): - service = BulkUploadService() - - assert service.validate_files(TEST_STAGING_METADATA) is None - - def test_create_lg_records_and_copy_files(set_env, mocker, mock_uuid): service = BulkUploadService() service.s3_service = mocker.MagicMock() diff --git a/lambdas/tests/unit/services/test_pds_api_service.py b/lambdas/tests/unit/services/test_pds_api_service.py index 3b3337289..370d45e5b 100644 --- a/lambdas/tests/unit/services/test_pds_api_service.py +++ b/lambdas/tests/unit/services/test_pds_api_service.py @@ -4,23 +4,14 @@ from botocore.exceptions import ClientError from enums.pds_ssm_parameters import SSMParameter from requests import Response +from tests.unit.helpers.mock_services import FakeSSMService + from services.pds_api_service import PdsApiService from tests.unit.helpers.data.pds.access_token_response import RESPONSE_TOKEN from tests.unit.helpers.data.pds.pds_patient_response import PDS_PATIENT from utils.exceptions import PdsErrorException -class FakeSSMService: - def __init__(self, *arg, **kwargs): - pass - - def get_ssm_parameters(self, parameters_keys, *arg, **kwargs): - return {parameter: f"test_value_{parameter}" for parameter in parameters_keys} - - def update_ssm_parameter(self, *arg, **kwargs): - pass - - fake_ssm_service = FakeSSMService() pds_service = PdsApiService(fake_ssm_service) diff --git a/lambdas/tests/unit/utils/test_lloyd_george_validator.py b/lambdas/tests/unit/utils/test_lloyd_george_validator.py index 0b55e32e2..bcf0c8406 100644 --- a/lambdas/tests/unit/utils/test_lloyd_george_validator.py +++ b/lambdas/tests/unit/utils/test_lloyd_george_validator.py @@ -1,10 +1,14 @@ +import json + import pytest +from requests import Response + +from tests.unit.helpers.data.pds.pds_patient_response import PDS_PATIENT from utils.lloyd_george_validator import ( LGInvalidFilesException, check_for_duplicate_files, check_for_file_names_agrees_with_each_other, check_for_number_of_files_match_expected, extract_info_from_filename, - validate_file_name, validate_lg_file_type) - + validate_file_name, validate_lg_file_type, validate_with_pds_service) def test_catching_error_when_file_type_not_pdf(): with pytest.raises(LGInvalidFilesException): @@ -117,3 +121,19 @@ def test_files_for_different_patients(): with pytest.raises(LGInvalidFilesException) as e: check_for_file_names_agrees_with_each_other(lg_file_list) assert e == LGInvalidFilesException("File names does not match with each other") + +def test_validate_nhs_id_with_pds_service(mocker): + response = Response() + response.status_code = 200 + response._content = json.dumps(PDS_PATIENT).encode("utf-8") + lg_file_list = [ + "1of2_Lloyd_George_Record_[Jane Smith]_[9000000009]_[22-10-2010].pdf", + "2of2_Lloyd_George_Record_[Jane Smith]_[9000000009]_[22-10-2010].pdf", + ] + mocker.patch( + "services.pds_api_service.PdsApiService.pds_request", + return_value=response, + ) + mocker.patch("utils.lloyd_george_validator.get_user_ods_code", return_value='Y12345') + + validate_with_pds_service(lg_file_list, "9000000009") diff --git a/lambdas/utils/lloyd_george_validator.py b/lambdas/utils/lloyd_george_validator.py index b9f298877..dcd8de06a 100644 --- a/lambdas/utils/lloyd_george_validator.py +++ b/lambdas/utils/lloyd_george_validator.py @@ -69,7 +69,7 @@ def validate_lg_file_names(file_name_list: list[str], nhs_number: Optional[str] if nhs_number: # Check file names match with the nhs number in metadata.csv - validate_nhs_number(file_name_list, nhs_number) + validate_with_pds_service(file_name_list, nhs_number,) def extract_info_from_filename(filename: str) -> dict: @@ -97,28 +97,30 @@ def check_for_file_names_agrees_with_each_other(file_name_list: list[str]): if len(set(expected_common_part)) != 1: raise LGInvalidFilesException("File names does not match with each other") -def validate_nhs_number(file_name_list: list[str], nhs_number: str): - # Check file names match with the nhs number in metadata.csv - file_name_info = extract_info_from_filename(file_name_list[0]) - if file_name_info["nhs_number"] != nhs_number: - raise LGInvalidFilesException( - "NHS number in file names does not match the given NHS number" - ) - patient_name = file_name_info["patient_name"] - date_of_birth = file_name_info["date_of_birth"] +def validate_with_pds_service(file_name_list: list[str], nhs_number: str): try: + # Check file names match with the nhs number in metadata.csv + file_name_info = extract_info_from_filename(file_name_list[0]) + if file_name_info["nhs_number"] != nhs_number: + raise LGInvalidFilesException( + "NHS number in file names does not match the given NHS number" + ) + patient_name = file_name_info["patient_name"] + date_of_birth = file_name_info["date_of_birth"] + pds_service = PdsApiService(SSMService()) pds_response = pds_service.pds_request(nsh_number=nhs_number, retry_on_expired=True) pds_response.raise_for_status() patient = Patient.model_validate(pds_response.json()) patient_details = patient.get_minimum_patient_details(nhs_number) current_user_ods = get_user_ods_code() + if patient_details.general_practice_ods != current_user_ods: raise LGInvalidFilesException("User is not allowed to access patient") - date_of_birth = datetime.datetime.strptime(date_of_birth, '%d/%m/%Y') + date_of_birth = datetime.datetime.strptime(date_of_birth, '%d-%m-%Y').date() if patient_details.birth_date != date_of_birth: raise LGInvalidFilesException("Patient DoB does not match our records") - patient_full_name = patient_details.given_Name + patient_details.family_name + patient_full_name =' '.join([name for name in patient_details.given_Name]) + ' ' + patient_details.family_name if patient_full_name != patient_name: raise LGInvalidFilesException("Patient name does not match our records") @@ -127,5 +129,5 @@ def validate_nhs_number(file_name_list: list[str], nhs_number: str): raise LGInvalidFilesException(e) def get_user_ods_code(): - ssm_servie = SSMService() - return ssm_servie.get_ssm_parameter(SSMParameter.GP_ODS_CODE.value) \ No newline at end of file + ssm_service = SSMService() + return ssm_service.get_ssm_parameter(SSMParameter.GP_ODS_CODE.value) \ No newline at end of file From 7ee1d8d8e048b8629e50fe9effe740661e805a87 Mon Sep 17 00:00:00 2001 From: NogaNHS Date: Tue, 24 Oct 2023 12:09:49 +0100 Subject: [PATCH 07/46] unit tests --- .../unit/utils/test_lloyd_george_validator.py | 150 +++++++++++++++++- lambdas/utils/lloyd_george_validator.py | 6 +- 2 files changed, 149 insertions(+), 7 deletions(-) diff --git a/lambdas/tests/unit/utils/test_lloyd_george_validator.py b/lambdas/tests/unit/utils/test_lloyd_george_validator.py index bcf0c8406..e11ca2d34 100644 --- a/lambdas/tests/unit/utils/test_lloyd_george_validator.py +++ b/lambdas/tests/unit/utils/test_lloyd_george_validator.py @@ -1,6 +1,7 @@ import json import pytest +from botocore.exceptions import ClientError from requests import Response from tests.unit.helpers.data.pds.pds_patient_response import PDS_PATIENT @@ -77,7 +78,7 @@ def test_files_list_with_missing_files(): "2of3_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf", ] check_for_number_of_files_match_expected( - lg_file_list[0], str(len(lg_file_list)) + lg_file_list[0], len(lg_file_list) ) @@ -88,7 +89,7 @@ def test_files_without_missing_files(): "2of2_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf", ] check_for_number_of_files_match_expected( - lg_file_list[0], str(len(lg_file_list)) + lg_file_list[0],len(lg_file_list) ) except LGInvalidFilesException: assert False, "There are missing file(s) in the request" @@ -130,10 +131,151 @@ def test_validate_nhs_id_with_pds_service(mocker): "1of2_Lloyd_George_Record_[Jane Smith]_[9000000009]_[22-10-2010].pdf", "2of2_Lloyd_George_Record_[Jane Smith]_[9000000009]_[22-10-2010].pdf", ] - mocker.patch( + mock_pds_call = mocker.patch( "services.pds_api_service.PdsApiService.pds_request", return_value=response, ) - mocker.patch("utils.lloyd_george_validator.get_user_ods_code", return_value='Y12345') + mock_odc_code = mocker.patch("utils.lloyd_george_validator.get_user_ods_code", return_value='Y12345') validate_with_pds_service(lg_file_list, "9000000009") + + mock_pds_call.assert_called_with(nsh_number="9000000009", retry_on_expired=True) + mock_odc_code.assert_called_once() + +def test_mismatch_nhs_id_no_pds_service(mocker): + with pytest.raises(LGInvalidFilesException): + lg_file_list = [ + "1of2_Lloyd_George_Record_[Jane Smith]_[9000000005]_[22-10-2010].pdf", + "2of2_Lloyd_George_Record_[Jane Smith]_[9000000005]_[22-10-2010].pdf", + ] + mock_pds_call = mocker.patch( + "services.pds_api_service.PdsApiService.pds_request" + ) + mock_odc_code = mocker.patch("utils.lloyd_george_validator.get_user_ods_code", return_value='Y12345') + + validate_with_pds_service(lg_file_list, "9000000009") + + mock_pds_call.assert_not_called() + mock_odc_code.assert_not_called() + + +def test_mismatch_name_with_pds_service(mocker): + with pytest.raises(LGInvalidFilesException): + response = Response() + response.status_code = 200 + response._content = json.dumps(PDS_PATIENT).encode("utf-8") + lg_file_list = [ + "1of2_Lloyd_George_Record_[Jane Plain Smith]_[9000000009]_[22-10-2010].pdf", + "2of2_Lloyd_George_Record_[Jane Plain Smith]_[9000000009]_[22-10-2010].pdf", + ] + mock_pds_call = mocker.patch( + "services.pds_api_service.PdsApiService.pds_request", + return_value=response, + ) + mock_odc_code = mocker.patch("utils.lloyd_george_validator.get_user_ods_code") + + validate_with_pds_service(lg_file_list, "9000000009") + + mock_pds_call.assert_called_with(nsh_number="9000000009", retry_on_expired=True) + mock_odc_code.assert_not_called() + +def test_mismatch_ods_with_pds_service(mocker): + with pytest.raises(LGInvalidFilesException): + response = Response() + response.status_code = 200 + response._content = json.dumps(PDS_PATIENT).encode("utf-8") + lg_file_list = [ + "1of2_Lloyd_George_Record_[Jane Smith]_[9000000009]_[22-10-2010].pdf", + "2of2_Lloyd_George_Record_[Jane Smith]_[9000000009]_[22-10-2010].pdf", + ] + mock_pds_call = mocker.patch( + "services.pds_api_service.PdsApiService.pds_request", + return_value=response, + ) + mock_odc_code = mocker.patch("utils.lloyd_george_validator.get_user_ods_code", return_value='H98765') + + validate_with_pds_service(lg_file_list, "9000000009") + + mock_pds_call.assert_called_with(nsh_number="9000000009", retry_on_expired=True) + mock_odc_code.assert_called_once() + +def test_mismatch_dob_with_pds_service(mocker): + with pytest.raises(LGInvalidFilesException): + response = Response() + response.status_code = 200 + response._content = json.dumps(PDS_PATIENT).encode("utf-8") + lg_file_list = [ + "1of2_Lloyd_George_Record_[Jane Plain Smith]_[9000000009]_[14-01-2000].pdf", + "2of2_Lloyd_George_Record_[Jane Plain Smith]_[9000000009]_[14-01-2000].pdf", + ] + mock_pds_call = mocker.patch( + "services.pds_api_service.PdsApiService.pds_request", + return_value=response, + ) + mock_odc_code = mocker.patch("utils.lloyd_george_validator.get_user_ods_code") + + validate_with_pds_service(lg_file_list, "9000000009") + + mock_pds_call.assert_called_with(nsh_number="9000000009", retry_on_expired=True) + mock_odc_code.assert_not_called() + +def test_patient_not_found_with_pds_service(mocker): + with pytest.raises(LGInvalidFilesException): + response = Response() + response.status_code = 404 + lg_file_list = [ + "1of2_Lloyd_George_Record_[Jane Smith]_[9000000009]_[22-10-2010].pdf", + "2of2_Lloyd_George_Record_[Jane Smith]_[9000000009]_[22-10-2010].pdf", + ] + mock_pds_call = mocker.patch( + "services.pds_api_service.PdsApiService.pds_request", + return_value=response, + ) + mock_odc_code = mocker.patch("utils.lloyd_george_validator.get_user_ods_code") + + validate_with_pds_service(lg_file_list, "9000000009") + + mock_pds_call.assert_called_with(nsh_number="9000000009", retry_on_expired=True) + mock_odc_code.assert_not_called() + +def test_bad_request_with_pds_service(mocker): + with pytest.raises(LGInvalidFilesException): + response = Response() + response.status_code = 400 + lg_file_list = [ + "1of2_Lloyd_George_Record_[Jane Plain Smith]_[9000000009]_[22-10-2010].pdf", + "2of2_Lloyd_George_Record_[Jane Plain Smith]_[9000000009]_[22-10-2010].pdf", + ] + mock_pds_call = mocker.patch( + "services.pds_api_service.PdsApiService.pds_request", + return_value=response, + ) + mock_odc_code = mocker.patch("utils.lloyd_george_validator.get_user_ods_code") + + validate_with_pds_service(lg_file_list, "9000000009") + + mock_pds_call.assert_called_with(nsh_number="9000000009", retry_on_expired=True) + mock_odc_code.assert_not_called() + +def test_raise_client_error_from_ssm_with_pds_service(mocker): + with pytest.raises(LGInvalidFilesException): + response = Response() + response.status_code = 200 + response._content = json.dumps(PDS_PATIENT).encode("utf-8") + mock_client_error = ClientError( + {"Error": {"Code": "500", "Message": "test error"}}, "testing" + ) + lg_file_list = [ + "1of2_Lloyd_George_Record_[Jane Plain Smith]_[9000000009]_[22-10-2010].pdf", + "2of2_Lloyd_George_Record_[Jane Plain Smith]_[9000000009]_[22-10-2010].pdf", + ] + mock_pds_call = mocker.patch( + "services.pds_api_service.PdsApiService.pds_request", + return_value=response, + ) + mock_odc_code = mocker.patch("utils.lloyd_george_validator.get_user_ods_code", side_effect=mock_client_error) + + validate_with_pds_service(lg_file_list, "9000000009") + + mock_pds_call.assert_called_with(nsh_number="9000000009", retry_on_expired=True) + mock_odc_code.assert_called_once() diff --git a/lambdas/utils/lloyd_george_validator.py b/lambdas/utils/lloyd_george_validator.py index dcd8de06a..ad5192763 100644 --- a/lambdas/utils/lloyd_george_validator.py +++ b/lambdas/utils/lloyd_george_validator.py @@ -113,16 +113,16 @@ def validate_with_pds_service(file_name_list: list[str], nhs_number: str): pds_response.raise_for_status() patient = Patient.model_validate(pds_response.json()) patient_details = patient.get_minimum_patient_details(nhs_number) - current_user_ods = get_user_ods_code() - if patient_details.general_practice_ods != current_user_ods: - raise LGInvalidFilesException("User is not allowed to access patient") date_of_birth = datetime.datetime.strptime(date_of_birth, '%d-%m-%Y').date() if patient_details.birth_date != date_of_birth: raise LGInvalidFilesException("Patient DoB does not match our records") patient_full_name =' '.join([name for name in patient_details.given_Name]) + ' ' + patient_details.family_name if patient_full_name != patient_name: raise LGInvalidFilesException("Patient name does not match our records") + current_user_ods = get_user_ods_code() + if patient_details.general_practice_ods != current_user_ods: + raise LGInvalidFilesException("User is not allowed to access patient") except (HTTPError, ValidationError, ClientError, ValueError) as e: logger.error(e) From 1ab7ee5d3217f2ea17ef799d6259a431cdede07a Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Tue, 24 Oct 2023 13:25:26 +0100 Subject: [PATCH 08/46] [PRMDR-372] Add model for bulk upload status, add test for parsing records --- lambdas/models/bulk_upload_status.py | 27 ++++++++++ lambdas/services/bulk_upload_service.py | 13 +++++ lambdas/tests/unit/conftest.py | 4 ++ .../unit/models/test_bulk_upload_status.py | 49 +++++++++++++++++++ 4 files changed, 93 insertions(+) create mode 100644 lambdas/models/bulk_upload_status.py create mode 100644 lambdas/tests/unit/models/test_bulk_upload_status.py diff --git a/lambdas/models/bulk_upload_status.py b/lambdas/models/bulk_upload_status.py new file mode 100644 index 000000000..9789d057a --- /dev/null +++ b/lambdas/models/bulk_upload_status.py @@ -0,0 +1,27 @@ +from typing import Annotated, Literal, Union + +from pydantic import BaseModel, Field, TypeAdapter + + +class SuccessfulUpload(BaseModel): + id: str = Field(alias="ID") + nhs_number: str + timestamp: str + upload_status: Literal["complete"] + file_location_in_bucket: str + + +class FailedUpload(BaseModel): + id: str = Field(alias="ID") + nhs_number: str + timestamp: str + upload_status: Literal["failed"] + failure_reason: str + file_path_of_failed_file: str + + +UploadStatus = TypeAdapter( + Annotated[ + Union[SuccessfulUpload, FailedUpload], Field(..., discriminator="upload_status") + ] +) diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index be69e5092..2bfac44de 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -4,6 +4,7 @@ import pydantic from botocore.exceptions import ClientError from enums.metadata_field_names import DocumentReferenceMetadataFields +from models.bulk_upload_status import BulkUploadStatus from models.nhs_document_reference import NHSDocumentReference from models.staging_metadata import MetadataFile, StagingMetadata from services.dynamo_service import DynamoDBService @@ -29,6 +30,7 @@ def __init__( self.staging_bucket_name = os.environ["STAGING_STORE_BUCKET_NAME"] self.lg_bucket_name = os.environ["LLOYD_GEORGE_BUCKET_NAME"] self.lg_dynamo_table = os.environ["LLOYD_GEORGE_DYNAMODB_NAME"] + self.bulk_upload_report_dynamo_table = os.environ["BULK_UPLOAD_DYNAMODB"] self.invalid_queue_url = os.environ["INVALID_SQS_QUEUE_URL"] self.pdf_content_type = "application/pdf" @@ -147,6 +149,17 @@ def rollback_transaction(self): f"Failed to rollback the incomplete transaction due to error: {e}" ) + def report_failure(self, nhs_number: str, reason: str, file_path: str): + record = BulkUploadStatus( + nhs_number=nhs_number, + failed_reason=reason, + file_path_of_failed_file=file_path, + ) + self.dynamo_service.create_item( + table_name=self.bulk_upload_report_dynamo_table, item=record.model_dump() + ) + pass + @staticmethod def strip_leading_slash(filepath: str) -> str: # Handle the filepaths irregularity in the given example of metadata.csv, diff --git a/lambdas/tests/unit/conftest.py b/lambdas/tests/unit/conftest.py index 8dc8ab2ff..7e7e2d97b 100644 --- a/lambdas/tests/unit/conftest.py +++ b/lambdas/tests/unit/conftest.py @@ -20,6 +20,8 @@ MOCK_LG_METADATA_SQS_QUEUE_ENV_NAME = "METADATA_SQS_QUEUE_URL" MOCK_LG_INVALID_SQS_QUEUE_ENV_NAME = "INVALID_SQS_QUEUE_URL" +MOCK_BULK_UPLOAD_DYNAMODB_ENV_NAME = "BULK_UPLOAD_DYNAMODB" + MOCK_ARF_TABLE_NAME = "test_arf_dynamoDB_table" MOCK_LG_TABLE_NAME = "test_lg_dynamoDB_table" MOCK_ARF_BUCKET = "test_arf_s3_bucket" @@ -29,6 +31,7 @@ MOCK_LG_STAGING_STORE_BUCKET = "test_staging_bulk_store" MOCK_LG_METADATA_SQS_QUEUE = "test_bulk_upload_metadata_queue" MOCK_LG_INVALID_SQS_QUEUE = "INVALID_SQS_QUEUE_URL" +MOCK_BULK_UPLOAD_DYNAMODB = "test_bulk_upload_dynamodb_table" TEST_NHS_NUMBER = "1111111111" TEST_OBJECT_KEY = "1234-4567-8912-HSDF-TEST" @@ -55,3 +58,4 @@ def set_env(monkeypatch): ) monkeypatch.setenv(MOCK_LG_METADATA_SQS_QUEUE_ENV_NAME, MOCK_LG_METADATA_SQS_QUEUE) monkeypatch.setenv(MOCK_LG_INVALID_SQS_QUEUE_ENV_NAME, MOCK_LG_INVALID_SQS_QUEUE) + monkeypatch.setenv(MOCK_BULK_UPLOAD_DYNAMODB_ENV_NAME, MOCK_BULK_UPLOAD_DYNAMODB) diff --git a/lambdas/tests/unit/models/test_bulk_upload_status.py b/lambdas/tests/unit/models/test_bulk_upload_status.py new file mode 100644 index 000000000..cdd10ecbb --- /dev/null +++ b/lambdas/tests/unit/models/test_bulk_upload_status.py @@ -0,0 +1,49 @@ + +from models.bulk_upload_status import (FailedUpload, SuccessfulUpload, + UploadStatus) + +MOCK_DATA_COMPLETE_UPLOAD = { + "ID": "719ca7a7-9c30-48f3-a472-c3daaf30d548", + "nhs_number": "9000000009", + "timestamp": "1698146661", + "upload_status": "complete", + "file_location_in_bucket": "s3://lloyd-george-bucket/9000000009/test-key-123", +} + +MOCK_DATA_FAILED_UPLOAD = { + "ID": "719ca7a7-9c30-48f3-a472-c3daaf30e975", + "nhs_number": "9000000025", + "timestamp": "169814650", + "upload_status": "failed", + "failure_reason": "File name not matching Lloyd George naming convention", + "file_path_of_failed_file": "s3://staging-bucket/9000000025/invalid_filename.pdf", +} + + +def test_parse_json_into_successful_upload(): + actual = UploadStatus.validate_python(MOCK_DATA_COMPLETE_UPLOAD) + expected = SuccessfulUpload( + ID="719ca7a7-9c30-48f3-a472-c3daaf30d548", + nhs_number="9000000009", + timestamp="1698146661", + upload_status="complete", + file_location_in_bucket="s3://lloyd-george-bucket/9000000009/test-key-123", + ) + + assert isinstance(actual, SuccessfulUpload) + assert actual == expected + + +def test_parse_json_into_failed_upload(): + actual = UploadStatus.validate_python(MOCK_DATA_FAILED_UPLOAD) + expected = FailedUpload( + ID="719ca7a7-9c30-48f3-a472-c3daaf30e975", + nhs_number="9000000025", + timestamp="169814650", + upload_status="failed", + failure_reason="File name not matching Lloyd George naming convention", + file_path_of_failed_file="/9000000025/invalid_filename.pdf", + ) + + assert isinstance(actual, FailedUpload) + assert actual == expected From 53b1913b59ee3455095160554b4a5c5137ce2d01 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Tue, 24 Oct 2023 18:03:18 +0100 Subject: [PATCH 09/46] [PRMDR-372] Modify upload status model for easier interaction with dynamodb --- lambdas/models/bulk_upload_status.py | 70 +++++++++++----- lambdas/models/config.py | 4 + .../unit/models/test_bulk_upload_status.py | 80 +++++++++++++++---- 3 files changed, 121 insertions(+), 33 deletions(-) diff --git a/lambdas/models/bulk_upload_status.py b/lambdas/models/bulk_upload_status.py index 9789d057a..53442f09c 100644 --- a/lambdas/models/bulk_upload_status.py +++ b/lambdas/models/bulk_upload_status.py @@ -1,27 +1,61 @@ -from typing import Annotated, Literal, Union +from datetime import datetime +from typing import List, Literal, TypeAlias, Union -from pydantic import BaseModel, Field, TypeAdapter +from models.config import to_capwords +from pydantic import BaseModel, ConfigDict, Field, TypeAdapter +from utils.utilities import create_reference_id -class SuccessfulUpload(BaseModel): - id: str = Field(alias="ID") +class UploadStatusBaseClass(BaseModel): + model_config = ConfigDict(alias_generator=to_capwords, populate_by_name=True) + id: str = Field(alias="ID", default_factory=create_reference_id) nhs_number: str - timestamp: str - upload_status: Literal["complete"] - file_location_in_bucket: str + timestamp: str = Field(default_factory=lambda: timestamp_as_string(now())) + date: str = Field(default_factory=lambda: date_as_string(now())) + file_path: str -class FailedUpload(BaseModel): - id: str = Field(alias="ID") - nhs_number: str - timestamp: str - upload_status: Literal["failed"] +class SuccessfulUpload(UploadStatusBaseClass): + upload_status: Literal["complete"] = "complete" + + +class FailedUpload(UploadStatusBaseClass): + upload_status: Literal["failed"] = "failed" failure_reason: str - file_path_of_failed_file: str -UploadStatus = TypeAdapter( - Annotated[ - Union[SuccessfulUpload, FailedUpload], Field(..., discriminator="upload_status") - ] -) +UploadStatus = TypeAdapter(Union[SuccessfulUpload, FailedUpload]) + +UploadStatusListType: TypeAlias = List[Union[SuccessfulUpload, FailedUpload]] +UploadStatusList = TypeAdapter(UploadStatusListType) + +FieldsToReport = [] + + +def now() -> datetime: + """Helper func for easier mocking, as datetime.now is immutable""" + return datetime.now() + + +def timestamp_as_string(time_now: datetime) -> str: + return str(time_now.timestamp()) + + +def date_as_string(time_now: datetime) -> str: + return time_now.strftime("%Y-%m-%d") + + +# def dump_upload_status_list_as_csv( +# upload_status_list: UploadStatusList, csv_file_path: str +# ): +# pass +# +# +# def store(persons: Iterable[Person]): +# fieldnames = list(Person.schema()["properties"].keys()) +# +# with open("test.csv", "w") as fp: +# writer = csv.DictWriter(fp, fieldnames=fieldnames) +# writer.writeheader() +# for person in persons: +# writer.writerow(json.loads(PersonOut(**person.dict()).json())) diff --git a/lambdas/models/config.py b/lambdas/models/config.py index 77d8b6ee4..6fa41845b 100644 --- a/lambdas/models/config.py +++ b/lambdas/models/config.py @@ -7,3 +7,7 @@ def to_camel(string: str) -> str: conf = ConfigDict(alias_generator=to_camel) + + +def to_capwords(snake_case_string: str) -> str: + return "".join(x.capitalize() for x in snake_case_string.split("_")) diff --git a/lambdas/tests/unit/models/test_bulk_upload_status.py b/lambdas/tests/unit/models/test_bulk_upload_status.py index cdd10ecbb..b99545021 100644 --- a/lambdas/tests/unit/models/test_bulk_upload_status.py +++ b/lambdas/tests/unit/models/test_bulk_upload_status.py @@ -1,49 +1,99 @@ +from datetime import datetime from models.bulk_upload_status import (FailedUpload, SuccessfulUpload, - UploadStatus) + UploadStatus, UploadStatusList) MOCK_DATA_COMPLETE_UPLOAD = { "ID": "719ca7a7-9c30-48f3-a472-c3daaf30d548", - "nhs_number": "9000000009", - "timestamp": "1698146661", - "upload_status": "complete", - "file_location_in_bucket": "s3://lloyd-george-bucket/9000000009/test-key-123", + "NhsNumber": "9000000009", + "Timestamp": "1698146661", + "Date": "2023-10-24", + "UploadStatus": "complete", + "FilePath": "/9000000009/1of1_Lloyd_George_Record_[Joe Bloggs]_[9000000009]_[25-12-2019]", } MOCK_DATA_FAILED_UPLOAD = { "ID": "719ca7a7-9c30-48f3-a472-c3daaf30e975", - "nhs_number": "9000000025", - "timestamp": "169814650", - "upload_status": "failed", - "failure_reason": "File name not matching Lloyd George naming convention", - "file_path_of_failed_file": "s3://staging-bucket/9000000025/invalid_filename.pdf", + "NhsNumber": "9000000025", + "Timestamp": "1698109408", + "Date": "2023-10-24", + "UploadStatus": "failed", + "FailureReason": "File name not matching Lloyd George naming convention", + "FilePath": "/9000000025/invalid_filename.pdf", } def test_parse_json_into_successful_upload(): - actual = UploadStatus.validate_python(MOCK_DATA_COMPLETE_UPLOAD) expected = SuccessfulUpload( ID="719ca7a7-9c30-48f3-a472-c3daaf30d548", nhs_number="9000000009", timestamp="1698146661", + date="2023-10-24", upload_status="complete", - file_location_in_bucket="s3://lloyd-george-bucket/9000000009/test-key-123", + file_path="/9000000009/1of1_Lloyd_George_Record_[Joe Bloggs]_[9000000009]_[25-12-2019]", ) + actual = UploadStatus.validate_python(MOCK_DATA_COMPLETE_UPLOAD) + assert isinstance(actual, SuccessfulUpload) assert actual == expected def test_parse_json_into_failed_upload(): - actual = UploadStatus.validate_python(MOCK_DATA_FAILED_UPLOAD) expected = FailedUpload( ID="719ca7a7-9c30-48f3-a472-c3daaf30e975", nhs_number="9000000025", - timestamp="169814650", + timestamp="1698109408", + date="2023-10-24", upload_status="failed", failure_reason="File name not matching Lloyd George naming convention", - file_path_of_failed_file="/9000000025/invalid_filename.pdf", + file_path="/9000000025/invalid_filename.pdf", ) + actual = UploadStatus.validate_python(MOCK_DATA_FAILED_UPLOAD) + assert isinstance(actual, FailedUpload) assert actual == expected + + +def test_parsing_a_list_of_record(): + items = [ + MOCK_DATA_COMPLETE_UPLOAD, + MOCK_DATA_FAILED_UPLOAD, + MOCK_DATA_COMPLETE_UPLOAD, + ] + + actual = UploadStatusList.validate_python(items) + + assert isinstance(actual[0], SuccessfulUpload) + assert isinstance(actual[1], FailedUpload) + assert isinstance(actual[2], SuccessfulUpload) + + +def test_ids_and_timestamp_are_auto_populated_if_not_given(mocker): + mocker.patch( + "models.bulk_upload_status.now", return_value=datetime(2023, 10, 20, 10, 25) + ) + mocker.patch("uuid.uuid4", return_value="mocked_uuid") + + upload_status = SuccessfulUpload( + nhs_number="9000000009", + file_path="/9000000009/1of1_Lloyd_George_Record_[Joe Bloggs]_[9000000009]_[25-12-2019]", + ) + + assert upload_status.date == "2023-10-20" + assert upload_status.timestamp == "1697793900.0" + assert upload_status.id == "mocked_uuid" + assert upload_status.upload_status == "complete" + print(upload_status.model_dump()) + + upload_status_failed = FailedUpload( + nhs_number="9000000009", + failure_reason="File name not matching Lloyd George name convention", + file_path="/9000000009/1of1_Lloyd_George_Record_[Joe Bloggs]_[9000000009]_[25-12-2019]", + ) + + assert upload_status_failed.date == "2023-10-20" + assert upload_status_failed.timestamp == "1697793900.0" + assert upload_status_failed.id == "mocked_uuid" + assert upload_status_failed.upload_status == "failed" From b2ec60b5d7baa109cbcc61b0c33f74e28da051c5 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Wed, 25 Oct 2023 09:15:49 +0100 Subject: [PATCH 10/46] [PRMDR-372] rebase with prmdr-370 --- lambdas/services/bulk_upload_service.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index 2bfac44de..d353a88fd 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -4,7 +4,7 @@ import pydantic from botocore.exceptions import ClientError from enums.metadata_field_names import DocumentReferenceMetadataFields -from models.bulk_upload_status import BulkUploadStatus +from models.bulk_upload_status import FailedUpload from models.nhs_document_reference import NHSDocumentReference from models.staging_metadata import MetadataFile, StagingMetadata from services.dynamo_service import DynamoDBService @@ -150,15 +150,14 @@ def rollback_transaction(self): ) def report_failure(self, nhs_number: str, reason: str, file_path: str): - record = BulkUploadStatus( + record = FailedUpload( nhs_number=nhs_number, failed_reason=reason, - file_path_of_failed_file=file_path, + file_path=file_path, ) self.dynamo_service.create_item( table_name=self.bulk_upload_report_dynamo_table, item=record.model_dump() ) - pass @staticmethod def strip_leading_slash(filepath: str) -> str: From de0e8c6adc6b9e1b82df3f87c53d14c244d074f3 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Wed, 25 Oct 2023 11:14:29 +0100 Subject: [PATCH 11/46] [PRMDR-372] Add unit test for bulk upload reporting, remove unwanted comments --- lambdas/models/bulk_upload_status.py | 16 ------ lambdas/services/bulk_upload_service.py | 54 ++++++++++++++----- .../unit/services/test_bulk_upload_service.py | 51 ++++++++++++++++-- 3 files changed, 86 insertions(+), 35 deletions(-) diff --git a/lambdas/models/bulk_upload_status.py b/lambdas/models/bulk_upload_status.py index 53442f09c..324cad1d2 100644 --- a/lambdas/models/bulk_upload_status.py +++ b/lambdas/models/bulk_upload_status.py @@ -43,19 +43,3 @@ def timestamp_as_string(time_now: datetime) -> str: def date_as_string(time_now: datetime) -> str: return time_now.strftime("%Y-%m-%d") - - -# def dump_upload_status_list_as_csv( -# upload_status_list: UploadStatusList, csv_file_path: str -# ): -# pass -# -# -# def store(persons: Iterable[Person]): -# fieldnames = list(Person.schema()["properties"].keys()) -# -# with open("test.csv", "w") as fp: -# writer = csv.DictWriter(fp, fieldnames=fieldnames) -# writer.writeheader() -# for person in persons: -# writer.writerow(json.loads(PersonOut(**person.dict()).json())) diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index d353a88fd..1c3774392 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -4,15 +4,14 @@ import pydantic from botocore.exceptions import ClientError from enums.metadata_field_names import DocumentReferenceMetadataFields -from models.bulk_upload_status import FailedUpload +from models.bulk_upload_status import FailedUpload, SuccessfulUpload from models.nhs_document_reference import NHSDocumentReference from models.staging_metadata import MetadataFile, StagingMetadata from services.dynamo_service import DynamoDBService from services.s3_service import S3Service from services.sqs_service import SQSService from utils.exceptions import InvalidMessageException -from utils.lloyd_george_validator import (LGInvalidFilesException, - validate_lg_file_names) +from utils.lloyd_george_validator import LGInvalidFilesException, validate_lg_file_names from utils.utilities import create_reference_id logger = logging.getLogger() @@ -52,12 +51,15 @@ def handle_sqs_message(self, message: dict): try: logger.info("Running validation for file names...") self.validate_files(staging_metadata) - except LGInvalidFilesException as e: + except LGInvalidFilesException as error: logger.info( f"Detected invalid file name related to patient number: {staging_metadata.nhs_number}" ) logger.info("Will stop processing Lloyd George record for this patient") - raise e + + failure_reason = str(error) + self.report_upload_failure(staging_metadata, failure_reason) + return logger.info("Validation complete. Start uploading Lloyd George records") @@ -73,6 +75,14 @@ def handle_sqs_message(self, message: dict): logger.info("Will try to rollback any change to database and bucket") self.rollback_transaction() + self.report_upload_failure( + staging_metadata, + "Validation passed but error occurred during file transfer", + ) + return + + self.report_upload_complete(staging_metadata) + def validate_files(self, staging_metadata: StagingMetadata): # Delegate to lloyd_george_validator service # Expect LGInvalidFilesException to be raised when validation fails @@ -149,15 +159,31 @@ def rollback_transaction(self): f"Failed to rollback the incomplete transaction due to error: {e}" ) - def report_failure(self, nhs_number: str, reason: str, file_path: str): - record = FailedUpload( - nhs_number=nhs_number, - failed_reason=reason, - file_path=file_path, - ) - self.dynamo_service.create_item( - table_name=self.bulk_upload_report_dynamo_table, item=record.model_dump() - ) + def report_upload_complete(self, staging_metadata: StagingMetadata): + nhs_number = staging_metadata.nhs_number + for file in staging_metadata.files: + dynamo_record = SuccessfulUpload( + nhs_number=nhs_number, + file_path=file.file_path, + ) + self.dynamo_service.create_item( + table_name=self.bulk_upload_report_dynamo_table, + item=dynamo_record.model_dump(), + ) + + def report_upload_failure(self, staging_metadata: StagingMetadata, failure_reason: str): + nhs_number = staging_metadata.nhs_number + + for file in staging_metadata.files: + dynamo_record = FailedUpload( + nhs_number=nhs_number, + failure_reason=failure_reason, + file_path=file.file_path, + ) + self.dynamo_service.create_item( + table_name=self.bulk_upload_report_dynamo_table, + item=dynamo_record.model_dump(), + ) @staticmethod def strip_leading_slash(filepath: str) -> str: diff --git a/lambdas/tests/unit/services/test_bulk_upload_service.py b/lambdas/tests/unit/services/test_bulk_upload_service.py index 350e8a424..62a7e7b1b 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_service.py @@ -1,12 +1,22 @@ import pytest from botocore.exceptions import ClientError from services.bulk_upload_service import BulkUploadService -from tests.unit.conftest import (MOCK_LG_BUCKET, MOCK_LG_STAGING_STORE_BUCKET, - MOCK_LG_TABLE_NAME, TEST_OBJECT_KEY) +from tests.unit.conftest import ( + MOCK_LG_BUCKET, + MOCK_LG_STAGING_STORE_BUCKET, + MOCK_LG_TABLE_NAME, + TEST_OBJECT_KEY, +) from tests.unit.helpers.data.bulk_upload.test_data import ( - TEST_DOCUMENT_REFERENCE, TEST_DOCUMENT_REFERENCE_LIST, TEST_FILE_METADATA, - TEST_NHS_NUMBER_FOR_BULK_UPLOAD, TEST_SQS_MESSAGE, TEST_STAGING_METADATA, - TEST_STAGING_METADATA_WITH_INVALID_FILENAME) + TEST_DOCUMENT_REFERENCE, + TEST_DOCUMENT_REFERENCE_LIST, + TEST_FILE_METADATA, + TEST_NHS_NUMBER_FOR_BULK_UPLOAD, + TEST_SQS_MESSAGE, + TEST_STAGING_METADATA, + TEST_STAGING_METADATA_WITH_INVALID_FILENAME, + TEST_SQS_MESSAGE_WITH_INVALID_FILENAME, +) from utils.exceptions import InvalidMessageException from utils.lloyd_george_validator import LGInvalidFilesException @@ -24,12 +34,39 @@ def test_handle_sqs_message_calls_create_lg_records_and_copy_files_when_validati mock_create_lg_records_and_copy_files = mocker.patch.object( BulkUploadService, "create_lg_records_and_copy_files" ) + mock_report_upload_complete = mocker.patch.object( + BulkUploadService, "report_upload_complete" + ) mocker.patch.object(BulkUploadService, "validate_files", return_value=None) service = BulkUploadService() service.handle_sqs_message(message=TEST_SQS_MESSAGE) mock_create_lg_records_and_copy_files.assert_called_with(TEST_STAGING_METADATA) + mock_report_upload_complete.assert_called() + + +def test_handle_sqs_message_calls_report_upload_failure_when_lg_file_are_invalid( + set_env, mocker, mock_uuid +): + mock_create_lg_records_and_copy_files = mocker.patch.object( + BulkUploadService, "create_lg_records_and_copy_files" + ) + mocked_report_upload_failure = mocker.patch.object( + BulkUploadService, "report_upload_failure" + ) + mocked_error = LGInvalidFilesException( + "One or more of the files do not match naming convention" + ) + mocker.patch.object(BulkUploadService, "validate_files", side_effect=mocked_error) + + service = BulkUploadService() + service.handle_sqs_message(message=TEST_SQS_MESSAGE_WITH_INVALID_FILENAME) + + mock_create_lg_records_and_copy_files.assert_not_called() + mocked_report_upload_failure.assert_called_with( + TEST_STAGING_METADATA_WITH_INVALID_FILENAME, str(mocked_error) + ) def test_handle_sqs_message_rollback_transaction_when_validation_pass_but_file_transfer_failed_halfway( @@ -38,6 +75,9 @@ def test_handle_sqs_message_rollback_transaction_when_validation_pass_but_file_t mocked_rollback_transaction = mocker.patch.object( BulkUploadService, "rollback_transaction" ) + mocked_report_upload_failure = mocker.patch.object( + BulkUploadService, "report_upload_failure" + ) service = BulkUploadService() service.s3_service = mocker.MagicMock() service.dynamo_service = mocker.MagicMock() @@ -54,6 +94,7 @@ def test_handle_sqs_message_rollback_transaction_when_validation_pass_but_file_t service.handle_sqs_message(message=TEST_SQS_MESSAGE) mocked_rollback_transaction.assert_called() + mocked_report_upload_failure.assert_called() def test_handle_sqs_message_raise_InvalidMessageException_when_failed_to_extract_data_from_message( From aa76b10c1f2390e249c0b9484adce003efd6572c Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Wed, 25 Oct 2023 11:29:40 +0100 Subject: [PATCH 12/46] [PRMDR-372] Fix mismatch of env var --- lambdas/services/bulk_upload_service.py | 2 +- lambdas/tests/unit/conftest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index 1c3774392..6266efdbc 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -29,7 +29,7 @@ def __init__( self.staging_bucket_name = os.environ["STAGING_STORE_BUCKET_NAME"] self.lg_bucket_name = os.environ["LLOYD_GEORGE_BUCKET_NAME"] self.lg_dynamo_table = os.environ["LLOYD_GEORGE_DYNAMODB_NAME"] - self.bulk_upload_report_dynamo_table = os.environ["BULK_UPLOAD_DYNAMODB"] + self.bulk_upload_report_dynamo_table = os.environ["BULK_UPLOAD_DYNAMODB_NAME"] self.invalid_queue_url = os.environ["INVALID_SQS_QUEUE_URL"] self.pdf_content_type = "application/pdf" diff --git a/lambdas/tests/unit/conftest.py b/lambdas/tests/unit/conftest.py index 7e7e2d97b..43d3775f7 100644 --- a/lambdas/tests/unit/conftest.py +++ b/lambdas/tests/unit/conftest.py @@ -20,7 +20,7 @@ MOCK_LG_METADATA_SQS_QUEUE_ENV_NAME = "METADATA_SQS_QUEUE_URL" MOCK_LG_INVALID_SQS_QUEUE_ENV_NAME = "INVALID_SQS_QUEUE_URL" -MOCK_BULK_UPLOAD_DYNAMODB_ENV_NAME = "BULK_UPLOAD_DYNAMODB" +MOCK_BULK_UPLOAD_DYNAMODB_ENV_NAME = "BULK_UPLOAD_DYNAMODB_NAME" MOCK_ARF_TABLE_NAME = "test_arf_dynamoDB_table" MOCK_LG_TABLE_NAME = "test_lg_dynamoDB_table" From 8f28dbc2425bee209a3c24583e0aee646da53071 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Wed, 25 Oct 2023 11:38:02 +0100 Subject: [PATCH 13/46] [PRMDR-372] fix misspelling, fix missing by_alias=True --- lambdas/services/bulk_upload_service.py | 4 ++-- .../unit/utils/test_lloyd_george_validator.py | 14 +++++++------- lambdas/utils/lloyd_george_validator.py | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index 6266efdbc..ce1bfc357 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -168,7 +168,7 @@ def report_upload_complete(self, staging_metadata: StagingMetadata): ) self.dynamo_service.create_item( table_name=self.bulk_upload_report_dynamo_table, - item=dynamo_record.model_dump(), + item=dynamo_record.model_dump(by_alias=True), ) def report_upload_failure(self, staging_metadata: StagingMetadata, failure_reason: str): @@ -182,7 +182,7 @@ def report_upload_failure(self, staging_metadata: StagingMetadata, failure_reaso ) self.dynamo_service.create_item( table_name=self.bulk_upload_report_dynamo_table, - item=dynamo_record.model_dump(), + item=dynamo_record.model_dump(by_alias=True), ) @staticmethod diff --git a/lambdas/tests/unit/utils/test_lloyd_george_validator.py b/lambdas/tests/unit/utils/test_lloyd_george_validator.py index e11ca2d34..8cfebe5bf 100644 --- a/lambdas/tests/unit/utils/test_lloyd_george_validator.py +++ b/lambdas/tests/unit/utils/test_lloyd_george_validator.py @@ -139,7 +139,7 @@ def test_validate_nhs_id_with_pds_service(mocker): validate_with_pds_service(lg_file_list, "9000000009") - mock_pds_call.assert_called_with(nsh_number="9000000009", retry_on_expired=True) + mock_pds_call.assert_called_with(nhs_number="9000000009", retry_on_expired=True) mock_odc_code.assert_called_once() def test_mismatch_nhs_id_no_pds_service(mocker): @@ -176,7 +176,7 @@ def test_mismatch_name_with_pds_service(mocker): validate_with_pds_service(lg_file_list, "9000000009") - mock_pds_call.assert_called_with(nsh_number="9000000009", retry_on_expired=True) + mock_pds_call.assert_called_with(nhs_number="9000000009", retry_on_expired=True) mock_odc_code.assert_not_called() def test_mismatch_ods_with_pds_service(mocker): @@ -196,7 +196,7 @@ def test_mismatch_ods_with_pds_service(mocker): validate_with_pds_service(lg_file_list, "9000000009") - mock_pds_call.assert_called_with(nsh_number="9000000009", retry_on_expired=True) + mock_pds_call.assert_called_with(nhs_number="9000000009", retry_on_expired=True) mock_odc_code.assert_called_once() def test_mismatch_dob_with_pds_service(mocker): @@ -216,7 +216,7 @@ def test_mismatch_dob_with_pds_service(mocker): validate_with_pds_service(lg_file_list, "9000000009") - mock_pds_call.assert_called_with(nsh_number="9000000009", retry_on_expired=True) + mock_pds_call.assert_called_with(nhs_number="9000000009", retry_on_expired=True) mock_odc_code.assert_not_called() def test_patient_not_found_with_pds_service(mocker): @@ -235,7 +235,7 @@ def test_patient_not_found_with_pds_service(mocker): validate_with_pds_service(lg_file_list, "9000000009") - mock_pds_call.assert_called_with(nsh_number="9000000009", retry_on_expired=True) + mock_pds_call.assert_called_with(nhs_number="9000000009", retry_on_expired=True) mock_odc_code.assert_not_called() def test_bad_request_with_pds_service(mocker): @@ -254,7 +254,7 @@ def test_bad_request_with_pds_service(mocker): validate_with_pds_service(lg_file_list, "9000000009") - mock_pds_call.assert_called_with(nsh_number="9000000009", retry_on_expired=True) + mock_pds_call.assert_called_with(nhs_number="9000000009", retry_on_expired=True) mock_odc_code.assert_not_called() def test_raise_client_error_from_ssm_with_pds_service(mocker): @@ -277,5 +277,5 @@ def test_raise_client_error_from_ssm_with_pds_service(mocker): validate_with_pds_service(lg_file_list, "9000000009") - mock_pds_call.assert_called_with(nsh_number="9000000009", retry_on_expired=True) + mock_pds_call.assert_called_with(nhs_number="9000000009", retry_on_expired=True) mock_odc_code.assert_called_once() diff --git a/lambdas/utils/lloyd_george_validator.py b/lambdas/utils/lloyd_george_validator.py index ad5192763..026f5732d 100644 --- a/lambdas/utils/lloyd_george_validator.py +++ b/lambdas/utils/lloyd_george_validator.py @@ -109,7 +109,7 @@ def validate_with_pds_service(file_name_list: list[str], nhs_number: str): date_of_birth = file_name_info["date_of_birth"] pds_service = PdsApiService(SSMService()) - pds_response = pds_service.pds_request(nsh_number=nhs_number, retry_on_expired=True) + pds_response = pds_service.pds_request(nhs_number=nhs_number, retry_on_expired=True) pds_response.raise_for_status() patient = Patient.model_validate(pds_response.json()) patient_details = patient.get_minimum_patient_details(nhs_number) From 32cf37641f634137c1d7a520b1d04b241e70eef6 Mon Sep 17 00:00:00 2001 From: Rio Knightley Date: Wed, 25 Oct 2023 11:44:16 +0100 Subject: [PATCH 14/46] Add bulk upload validation for virus scanner --- .../DocumentSearchResultsOptions.tsx | 2 +- .../helpers/requests/getPresignedUrlForZip.ts | 2 +- lambdas/services/bulk_upload_service.py | 55 ++++++++++++++++++- lambdas/services/s3_service.py | 14 +++++ lambdas/utils/exceptions.py | 8 +++ 5 files changed, 76 insertions(+), 5 deletions(-) diff --git a/app/src/components/blocks/documentSearchResultsOptions/DocumentSearchResultsOptions.tsx b/app/src/components/blocks/documentSearchResultsOptions/DocumentSearchResultsOptions.tsx index 0f19ab225..478064dec 100644 --- a/app/src/components/blocks/documentSearchResultsOptions/DocumentSearchResultsOptions.tsx +++ b/app/src/components/blocks/documentSearchResultsOptions/DocumentSearchResultsOptions.tsx @@ -44,7 +44,7 @@ const DocumentSearchResultsOptions = (props: Props) => { nhsNumber: props.nhsNumber, baseUrl: baseUrl, baseHeaders, - docType: DOCUMENT_TYPE.ARF, + docType: [DOCUMENT_TYPE.ARF, DOCUMENT_TYPE.LLOYD_GEORGE], }); const filename = `patient-record-${props.nhsNumber}`; diff --git a/app/src/helpers/requests/getPresignedUrlForZip.ts b/app/src/helpers/requests/getPresignedUrlForZip.ts index b197612f0..555d2d56e 100644 --- a/app/src/helpers/requests/getPresignedUrlForZip.ts +++ b/app/src/helpers/requests/getPresignedUrlForZip.ts @@ -7,7 +7,7 @@ type Args = { nhsNumber: string; baseUrl: string; baseHeaders: AuthHeaders; - docType: DOCUMENT_TYPE; + docType: DOCUMENT_TYPE | Array; }; type GetPresignedUrl = { diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index 29ff0ce19..710458bbd 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -9,7 +9,8 @@ from services.dynamo_service import DynamoDBService from services.s3_service import S3Service from services.sqs_service import SQSService -from utils.exceptions import InvalidMessageException +from utils.exceptions import (InvalidMessageException, VirusFailedException, + VirusNoResultException) from utils.lloyd_george_validator import (LGInvalidFilesException, validate_lg_file_names) from utils.utilities import create_reference_id @@ -30,7 +31,7 @@ def __init__( self.lg_bucket_name = os.environ["LLOYD_GEORGE_BUCKET_NAME"] self.lg_dynamo_table = os.environ["LLOYD_GEORGE_DYNAMODB_NAME"] self.invalid_queue_url = os.environ["INVALID_SQS_QUEUE_URL"] - + self.metadata_queue_url = os.environ["METADATA_SQS_QUEUE_URL"] self.pdf_content_type = "application/pdf" self.dynamo_records_in_transaction: list[NHSDocumentReference] = [] self.dest_bucket_files_in_transaction = [] @@ -57,7 +58,27 @@ def handle_sqs_message(self, message: dict): logger.info("Will stop processing Lloyd George record for this patient") raise e - logger.info("Validation complete. Start uploading Lloyd George records") + logger.info("File validation complete. Checking virus scan results") + + try: + logger.info("Running validation for virus scan results...") + self.check_virus_result(staging_metadata) + except VirusNoResultException as e: + logger.info( + f"Waiting on virus scan results for: {staging_metadata.nhs_number}, adding message back to queue" + ) + logger.info("Will stop processing Lloyd George record for this patient") + raise e + except VirusFailedException as e: + logger.info( + f"Virus scan results check failed for: {staging_metadata.nhs_number}, removing from queue" + ) + logger.info("Will stop processing Lloyd George record for this patient") + raise e + + logger.info( + "Virus result validation complete. Start uploading Lloyd George records" + ) self.init_transaction() @@ -77,8 +98,36 @@ def validate_files(self, staging_metadata: StagingMetadata): file_names = [ os.path.basename(metadata.file_path) for metadata in staging_metadata.files ] + validate_lg_file_names(file_names, staging_metadata.nhs_number) + def check_virus_result(self, staging_metadata: StagingMetadata): + for file_metadata in staging_metadata.files: + source_file_key = self.strip_leading_slash(file_metadata.file_path) + scan_result = self.s3_service.get_tag_value( + self, self.staging_bucket_name, source_file_key, "scan-result" + ) + if scan_result == "Clean": + pass + elif scan_result == "Infected": + logger.info( + f"Found infected document(s) for scanId: {file_metadata.scan_id}" + ) + raise VirusFailedException + else: + sqs_service = SQSService() + nhs_number = staging_metadata.nhs_number + logger.info( + f"Found no virus scan results for scanId: {file_metadata.scan_id}" + ) + + sqs_service.send_message_with_nhs_number_attr( + queue_url=self.metadata_queue_url, + message_body=staging_metadata.model_dump_json(by_alias=True), + nhs_number=nhs_number, + ) + raise VirusNoResultException + def init_transaction(self): self.dynamo_records_in_transaction = [] self.dest_bucket_files_in_transaction = [] diff --git a/lambdas/services/s3_service.py b/lambdas/services/s3_service.py index 9e0168dcb..70c14f504 100644 --- a/lambdas/services/s3_service.py +++ b/lambdas/services/s3_service.py @@ -3,6 +3,7 @@ import boto3 from botocore.client import Config as BotoConfig +from lambdas.utils.exceptions import TagNotFoundException logger = logging.getLogger() logger.setLevel(logging.INFO) @@ -65,3 +66,16 @@ def copy_across_bucket( def delete_object(self, s3_bucket_name: str, file_key: str): return self.client.delete_object(Bucket=s3_bucket_name, Key=file_key) + + def get_tag_value(self, s3_bucket_name: str, file_key: str, tag_key: str): + response = self.client.get_object_tagging( + Bucket=s3_bucket_name, + Key=file_key, + ) + for key_value_pair in response["TagSet"]: + if key_value_pair["Key"] == tag_key: + status = str(key_value_pair["Value"]) + if status: + return status + else: + raise TagNotFoundException diff --git a/lambdas/utils/exceptions.py b/lambdas/utils/exceptions.py index 316342ad7..276005939 100644 --- a/lambdas/utils/exceptions.py +++ b/lambdas/utils/exceptions.py @@ -50,3 +50,11 @@ class LGFileTypeException(ValueError): class InvalidMessageException(Exception): pass + + +class VirusNoResultException(Exception): + pass + + +class VirusFailedException(Exception): + pass From c70f223f4ae70d944022a11db86a816f87d0f614 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Wed, 25 Oct 2023 12:10:17 +0100 Subject: [PATCH 15/46] [PRMDR-372] hotfix to use mock pds service in LG validation --- lambdas/utils/lloyd_george_validator.py | 5 +++-- lambdas/utils/utilities.py | 12 ++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/lambdas/utils/lloyd_george_validator.py b/lambdas/utils/lloyd_george_validator.py index 026f5732d..fe9b49e18 100644 --- a/lambdas/utils/lloyd_george_validator.py +++ b/lambdas/utils/lloyd_george_validator.py @@ -10,8 +10,8 @@ from enums.pds_ssm_parameters import SSMParameter from models.nhs_document_reference import NHSDocumentReference from models.pds_models import Patient -from services.pds_api_service import PdsApiService from services.ssm_service import SSMService +from utils.utilities import get_pds_service logger = logging.getLogger() logger.setLevel(logging.INFO) @@ -108,7 +108,8 @@ def validate_with_pds_service(file_name_list: list[str], nhs_number: str): patient_name = file_name_info["patient_name"] date_of_birth = file_name_info["date_of_birth"] - pds_service = PdsApiService(SSMService()) + pds_service_class = get_pds_service() + pds_service = pds_service_class(SSMService()) pds_response = pds_service.pds_request(nhs_number=nhs_number, retry_on_expired=True) pds_response.raise_for_status() patient = Patient.model_validate(pds_response.json()) diff --git a/lambdas/utils/utilities.py b/lambdas/utils/utilities.py index d1662a2cb..2fb357cf5 100755 --- a/lambdas/utils/utilities.py +++ b/lambdas/utils/utilities.py @@ -1,7 +1,11 @@ +import os import re import uuid from inflection import camelize + +from services.mock_pds_service import MockPdsApiService +from services.pds_api_service import PdsApiService from utils.exceptions import InvalidResourceIdException @@ -34,3 +38,11 @@ def camelize_dict(data: dict) -> dict: def create_reference_id() -> str: return str(uuid.uuid4()) + + +def get_pds_service(): + return ( + PdsApiService + if (os.getenv("PDS_FHIR_IS_STUBBED") == "false") + else MockPdsApiService + ) \ No newline at end of file From 8f2a7e110db05bbdb4aabcdfa6fc228497dbaf99 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Wed, 25 Oct 2023 12:18:36 +0100 Subject: [PATCH 16/46] [PRMDR-372] replace mocked pds patient ods code --- lambdas/services/mock_data/pds_patient.json | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lambdas/services/mock_data/pds_patient.json b/lambdas/services/mock_data/pds_patient.json index 80a303bbc..323e418df 100644 --- a/lambdas/services/mock_data/pds_patient.json +++ b/lambdas/services/mock_data/pds_patient.json @@ -80,10 +80,9 @@ "type": "Organization", "identifier": { "system": "https://fhir.nhs.uk/Id/ods-organization-code", - "value": "Y12345", + "value": "H81109", "period": { - "start": "2020-01-01", - "end": "2021-12-31" + "start": "2020-01-01" } } } From 3f38eb070773242000bc49d8b29e97880fceeb91 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Wed, 25 Oct 2023 14:46:10 +0100 Subject: [PATCH 17/46] [PRMDR-372] Fix unit test that was broken during merge --- .../unit/utils/test_lloyd_george_validator.py | 57 +++++++------------ 1 file changed, 20 insertions(+), 37 deletions(-) diff --git a/lambdas/tests/unit/utils/test_lloyd_george_validator.py b/lambdas/tests/unit/utils/test_lloyd_george_validator.py index b9fe438fb..994e6d986 100644 --- a/lambdas/tests/unit/utils/test_lloyd_george_validator.py +++ b/lambdas/tests/unit/utils/test_lloyd_george_validator.py @@ -126,7 +126,7 @@ def test_files_for_different_patients(): assert e == LGInvalidFilesException("File names does not match with each other") -def test_validate_nhs_id_with_pds_service(mocker): +def test_validate_nhs_id_with_pds_service(mocker, mock_pds_call): response = Response() response.status_code = 200 response._content = json.dumps(PDS_PATIENT).encode("utf-8") @@ -134,10 +134,7 @@ def test_validate_nhs_id_with_pds_service(mocker): "1of2_Lloyd_George_Record_[Jane Smith]_[9000000009]_[22-10-2010].pdf", "2of2_Lloyd_George_Record_[Jane Smith]_[9000000009]_[22-10-2010].pdf", ] - mock_pds_call = mocker.patch( - "services.pds_api_service.PdsApiService.pds_request", - return_value=response, - ) + mock_pds_call.return_value = response mock_odc_code = mocker.patch( "utils.lloyd_george_validator.get_user_ods_code", return_value="Y12345" ) @@ -148,12 +145,11 @@ def test_validate_nhs_id_with_pds_service(mocker): mock_odc_code.assert_called_once() -def test_mismatch_nhs_id_no_pds_service(mocker): +def test_mismatch_nhs_id_no_pds_service(mocker, mock_pds_call): lg_file_list = [ "1of2_Lloyd_George_Record_[Jane Smith]_[9000000005]_[22-10-2010].pdf", "2of2_Lloyd_George_Record_[Jane Smith]_[9000000005]_[22-10-2010].pdf", ] - mock_pds_call = mocker.patch("services.pds_api_service.PdsApiService.pds_request") mock_odc_code = mocker.patch( "utils.lloyd_george_validator.get_user_ods_code", return_value="Y12345" ) @@ -165,7 +161,7 @@ def test_mismatch_nhs_id_no_pds_service(mocker): mock_odc_code.assert_not_called() -def test_mismatch_name_with_pds_service(mocker): +def test_mismatch_name_with_pds_service(mocker, mock_pds_call): response = Response() response.status_code = 200 response._content = json.dumps(PDS_PATIENT).encode("utf-8") @@ -173,10 +169,7 @@ def test_mismatch_name_with_pds_service(mocker): "1of2_Lloyd_George_Record_[Jane Plain Smith]_[9000000009]_[22-10-2010].pdf", "2of2_Lloyd_George_Record_[Jane Plain Smith]_[9000000009]_[22-10-2010].pdf", ] - mock_pds_call = mocker.patch( - "services.pds_api_service.PdsApiService.pds_request", - return_value=response, - ) + mock_pds_call.return_value = response mock_odc_code = mocker.patch("utils.lloyd_george_validator.get_user_ods_code") with pytest.raises(LGInvalidFilesException): @@ -186,7 +179,7 @@ def test_mismatch_name_with_pds_service(mocker): mock_odc_code.assert_not_called() -def test_mismatch_ods_with_pds_service(mocker): +def test_mismatch_ods_with_pds_service(mocker, mock_pds_call): response = Response() response.status_code = 200 response._content = json.dumps(PDS_PATIENT).encode("utf-8") @@ -194,10 +187,7 @@ def test_mismatch_ods_with_pds_service(mocker): "1of2_Lloyd_George_Record_[Jane Smith]_[9000000009]_[22-10-2010].pdf", "2of2_Lloyd_George_Record_[Jane Smith]_[9000000009]_[22-10-2010].pdf", ] - mock_pds_call = mocker.patch( - "services.pds_api_service.PdsApiService.pds_request", - return_value=response, - ) + mock_pds_call.return_value = response mock_odc_code = mocker.patch( "utils.lloyd_george_validator.get_user_ods_code", return_value="H98765" ) @@ -209,7 +199,7 @@ def test_mismatch_ods_with_pds_service(mocker): mock_odc_code.assert_called_once() -def test_mismatch_dob_with_pds_service(mocker): +def test_mismatch_dob_with_pds_service(mocker, mock_pds_call): response = Response() response.status_code = 200 response._content = json.dumps(PDS_PATIENT).encode("utf-8") @@ -217,10 +207,7 @@ def test_mismatch_dob_with_pds_service(mocker): "1of2_Lloyd_George_Record_[Jane Plain Smith]_[9000000009]_[14-01-2000].pdf", "2of2_Lloyd_George_Record_[Jane Plain Smith]_[9000000009]_[14-01-2000].pdf", ] - mock_pds_call = mocker.patch( - "services.pds_api_service.PdsApiService.pds_request", - return_value=response, - ) + mock_pds_call.return_value = response mock_odc_code = mocker.patch("utils.lloyd_george_validator.get_user_ods_code") with pytest.raises(LGInvalidFilesException): @@ -230,17 +217,14 @@ def test_mismatch_dob_with_pds_service(mocker): mock_odc_code.assert_not_called() -def test_patient_not_found_with_pds_service(mocker): +def test_patient_not_found_with_pds_service(mocker, mock_pds_call): response = Response() response.status_code = 404 lg_file_list = [ "1of2_Lloyd_George_Record_[Jane Smith]_[9000000009]_[22-10-2010].pdf", "2of2_Lloyd_George_Record_[Jane Smith]_[9000000009]_[22-10-2010].pdf", ] - mock_pds_call = mocker.patch( - "services.pds_api_service.PdsApiService.pds_request", - return_value=response, - ) + mock_pds_call.return_value = response mock_odc_code = mocker.patch("utils.lloyd_george_validator.get_user_ods_code") with pytest.raises(LGInvalidFilesException): @@ -250,17 +234,14 @@ def test_patient_not_found_with_pds_service(mocker): mock_odc_code.assert_not_called() -def test_bad_request_with_pds_service(mocker): +def test_bad_request_with_pds_service(mocker, mock_pds_call): response = Response() response.status_code = 400 lg_file_list = [ "1of2_Lloyd_George_Record_[Jane Plain Smith]_[9000000009]_[22-10-2010].pdf", "2of2_Lloyd_George_Record_[Jane Plain Smith]_[9000000009]_[22-10-2010].pdf", ] - mock_pds_call = mocker.patch( - "services.pds_api_service.PdsApiService.pds_request", - return_value=response, - ) + mock_pds_call.return_value = response mock_odc_code = mocker.patch("utils.lloyd_george_validator.get_user_ods_code") with pytest.raises(LGInvalidFilesException): @@ -270,7 +251,7 @@ def test_bad_request_with_pds_service(mocker): mock_odc_code.assert_not_called() -def test_raise_client_error_from_ssm_with_pds_service(mocker): +def test_raise_client_error_from_ssm_with_pds_service(mocker, mock_pds_call): response = Response() response.status_code = 200 response._content = json.dumps(PDS_PATIENT).encode("utf-8") @@ -281,10 +262,7 @@ def test_raise_client_error_from_ssm_with_pds_service(mocker): "1of2_Lloyd_George_Record_[Jane Smith]_[9000000009]_[22-10-2010].pdf", "2of2_Lloyd_George_Record_[Jane Smith]_[9000000009]_[22-10-2010].pdf", ] - mock_pds_call = mocker.patch( - "services.pds_api_service.PdsApiService.pds_request", - return_value=response, - ) + mock_pds_call.return_value = response mock_odc_code = mocker.patch( "utils.lloyd_george_validator.get_user_ods_code", return_value=mock_client_error ) @@ -294,3 +272,8 @@ def test_raise_client_error_from_ssm_with_pds_service(mocker): mock_pds_call.assert_called_with(nhs_number="9000000009", retry_on_expired=True) mock_odc_code.assert_called_once() + + +@pytest.fixture +def mock_pds_call(mocker): + yield mocker.patch("services.mock_pds_service.MockPdsApiService.pds_request") From ab5922700a86546b0b5ec4bdcd4969d501c93b82 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Wed, 25 Oct 2023 14:46:31 +0100 Subject: [PATCH 18/46] [PRMDR-372] run formatter --- lambdas/models/pds_models.py | 1 - lambdas/services/bulk_upload_service.py | 7 +++++-- lambdas/tests/unit/models/test_pds_models.py | 10 +++------ .../unit/services/test_bulk_upload_service.py | 21 ++++++------------- .../unit/services/test_pds_api_service.py | 4 +--- .../unit/utils/test_lloyd_george_validator.py | 12 +++-------- lambdas/utils/lloyd_george_validator.py | 11 +++++----- lambdas/utils/utilities.py | 3 +-- 8 files changed, 25 insertions(+), 44 deletions(-) diff --git a/lambdas/models/pds_models.py b/lambdas/models/pds_models.py index 8d1617b39..2c9f970f0 100644 --- a/lambdas/models/pds_models.py +++ b/lambdas/models/pds_models.py @@ -70,7 +70,6 @@ class Patient(BaseModel): meta: Meta general_practitioner: Optional[list[GeneralPractitioner]] = [] - def get_security(self) -> Security: security = self.meta.security[0] if self.meta.security[0] else None if not security: diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index ce1bfc357..de55fde22 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -11,7 +11,8 @@ from services.s3_service import S3Service from services.sqs_service import SQSService from utils.exceptions import InvalidMessageException -from utils.lloyd_george_validator import LGInvalidFilesException, validate_lg_file_names +from utils.lloyd_george_validator import (LGInvalidFilesException, + validate_lg_file_names) from utils.utilities import create_reference_id logger = logging.getLogger() @@ -171,7 +172,9 @@ def report_upload_complete(self, staging_metadata: StagingMetadata): item=dynamo_record.model_dump(by_alias=True), ) - def report_upload_failure(self, staging_metadata: StagingMetadata, failure_reason: str): + def report_upload_failure( + self, staging_metadata: StagingMetadata, failure_reason: str + ): nhs_number = staging_metadata.nhs_number for file in staging_metadata.files: diff --git a/lambdas/tests/unit/models/test_pds_models.py b/lambdas/tests/unit/models/test_pds_models.py index 373e7b491..c7db8c970 100644 --- a/lambdas/tests/unit/models/test_pds_models.py +++ b/lambdas/tests/unit/models/test_pds_models.py @@ -1,14 +1,10 @@ import pytest from freezegun import freeze_time - from models.pds_models import PatientDetails -from tests.unit.helpers.data.pds.utils import create_patient from tests.unit.helpers.data.pds.pds_patient_response import ( - PDS_PATIENT_WITHOUT_ACTIVE_GP, - PDS_PATIENT_WITH_GP_END_DATE, - PDS_PATIENT, - PDS_PATIENT_RESTRICTED, -) + PDS_PATIENT, PDS_PATIENT_RESTRICTED, PDS_PATIENT_WITH_GP_END_DATE, + PDS_PATIENT_WITHOUT_ACTIVE_GP) +from tests.unit.helpers.data.pds.utils import create_patient from utils.exceptions import InvalidResourceIdException from utils.utilities import validate_id diff --git a/lambdas/tests/unit/services/test_bulk_upload_service.py b/lambdas/tests/unit/services/test_bulk_upload_service.py index 62a7e7b1b..a79bd7370 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_service.py @@ -1,22 +1,13 @@ import pytest from botocore.exceptions import ClientError from services.bulk_upload_service import BulkUploadService -from tests.unit.conftest import ( - MOCK_LG_BUCKET, - MOCK_LG_STAGING_STORE_BUCKET, - MOCK_LG_TABLE_NAME, - TEST_OBJECT_KEY, -) +from tests.unit.conftest import (MOCK_LG_BUCKET, MOCK_LG_STAGING_STORE_BUCKET, + MOCK_LG_TABLE_NAME, TEST_OBJECT_KEY) from tests.unit.helpers.data.bulk_upload.test_data import ( - TEST_DOCUMENT_REFERENCE, - TEST_DOCUMENT_REFERENCE_LIST, - TEST_FILE_METADATA, - TEST_NHS_NUMBER_FOR_BULK_UPLOAD, - TEST_SQS_MESSAGE, - TEST_STAGING_METADATA, - TEST_STAGING_METADATA_WITH_INVALID_FILENAME, - TEST_SQS_MESSAGE_WITH_INVALID_FILENAME, -) + TEST_DOCUMENT_REFERENCE, TEST_DOCUMENT_REFERENCE_LIST, TEST_FILE_METADATA, + TEST_NHS_NUMBER_FOR_BULK_UPLOAD, TEST_SQS_MESSAGE, + TEST_SQS_MESSAGE_WITH_INVALID_FILENAME, TEST_STAGING_METADATA, + TEST_STAGING_METADATA_WITH_INVALID_FILENAME) from utils.exceptions import InvalidMessageException from utils.lloyd_george_validator import LGInvalidFilesException diff --git a/lambdas/tests/unit/services/test_pds_api_service.py b/lambdas/tests/unit/services/test_pds_api_service.py index 370d45e5b..cc448c6c3 100644 --- a/lambdas/tests/unit/services/test_pds_api_service.py +++ b/lambdas/tests/unit/services/test_pds_api_service.py @@ -4,14 +4,12 @@ from botocore.exceptions import ClientError from enums.pds_ssm_parameters import SSMParameter from requests import Response -from tests.unit.helpers.mock_services import FakeSSMService - from services.pds_api_service import PdsApiService from tests.unit.helpers.data.pds.access_token_response import RESPONSE_TOKEN from tests.unit.helpers.data.pds.pds_patient_response import PDS_PATIENT +from tests.unit.helpers.mock_services import FakeSSMService from utils.exceptions import PdsErrorException - fake_ssm_service = FakeSSMService() pds_service = PdsApiService(fake_ssm_service) diff --git a/lambdas/tests/unit/utils/test_lloyd_george_validator.py b/lambdas/tests/unit/utils/test_lloyd_george_validator.py index 994e6d986..784ea2c56 100644 --- a/lambdas/tests/unit/utils/test_lloyd_george_validator.py +++ b/lambdas/tests/unit/utils/test_lloyd_george_validator.py @@ -3,18 +3,12 @@ import pytest from botocore.exceptions import ClientError from requests import Response - from tests.unit.helpers.data.pds.pds_patient_response import PDS_PATIENT from utils.lloyd_george_validator import ( - LGInvalidFilesException, - check_for_duplicate_files, + LGInvalidFilesException, check_for_duplicate_files, check_for_file_names_agrees_with_each_other, - check_for_number_of_files_match_expected, - extract_info_from_filename, - validate_file_name, - validate_lg_file_type, - validate_with_pds_service, -) + check_for_number_of_files_match_expected, extract_info_from_filename, + validate_file_name, validate_lg_file_type, validate_with_pds_service) def test_catching_error_when_file_type_not_pdf(): diff --git a/lambdas/utils/lloyd_george_validator.py b/lambdas/utils/lloyd_george_validator.py index fb82170d2..18102076b 100644 --- a/lambdas/utils/lloyd_george_validator.py +++ b/lambdas/utils/lloyd_george_validator.py @@ -4,12 +4,11 @@ from typing import Optional from botocore.exceptions import ClientError -from pydantic import ValidationError -from requests import HTTPError - from enums.pds_ssm_parameters import SSMParameter from models.nhs_document_reference import NHSDocumentReference from models.pds_models import Patient +from pydantic import ValidationError +from requests import HTTPError from services.ssm_service import SSMService from utils.utilities import get_pds_service @@ -67,10 +66,12 @@ def validate_lg_file_names(file_name_list: list[str], nhs_number: Optional[str] check_for_duplicate_files(file_name_list) check_for_file_names_agrees_with_each_other(file_name_list) - if nhs_number: # Check file names match with the nhs number in metadata.csv - validate_with_pds_service(file_name_list, nhs_number,) + validate_with_pds_service( + file_name_list, + nhs_number, + ) def extract_info_from_filename(filename: str) -> dict: diff --git a/lambdas/utils/utilities.py b/lambdas/utils/utilities.py index 2fb357cf5..1c834c48b 100755 --- a/lambdas/utils/utilities.py +++ b/lambdas/utils/utilities.py @@ -3,7 +3,6 @@ import uuid from inflection import camelize - from services.mock_pds_service import MockPdsApiService from services.pds_api_service import PdsApiService from utils.exceptions import InvalidResourceIdException @@ -45,4 +44,4 @@ def get_pds_service(): PdsApiService if (os.getenv("PDS_FHIR_IS_STUBBED") == "false") else MockPdsApiService - ) \ No newline at end of file + ) From 16caf7d8454ef1fa18bafe88a7f3f165cfbda403 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Wed, 25 Oct 2023 14:56:45 +0100 Subject: [PATCH 19/46] Merge branch prmdr-369-squash into prmdr-372-subtask --- lambdas/services/bulk_upload_service.py | 4 ++-- lambdas/services/s3_service.py | 1 + lambdas/tests/unit/services/test_bulk_upload_service.py | 1 + lambdas/utils/exceptions.py | 4 ++++ 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index 675e46042..2d6884e89 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -118,10 +118,10 @@ def check_virus_result(self, staging_metadata: StagingMetadata): for file_metadata in staging_metadata.files: source_file_key = self.strip_leading_slash(file_metadata.file_path) scan_result = self.s3_service.get_tag_value( - self, self.staging_bucket_name, source_file_key, "scan-result" + self.staging_bucket_name, source_file_key, "scan-result" ) if scan_result == "Clean": - pass + continue elif scan_result == "Infected": logger.info( f"Found infected document(s) for scanId: {file_metadata.scan_id}" diff --git a/lambdas/services/s3_service.py b/lambdas/services/s3_service.py index 58a1d5faf..408437a98 100644 --- a/lambdas/services/s3_service.py +++ b/lambdas/services/s3_service.py @@ -85,6 +85,7 @@ def get_tag_value(self, s3_bucket_name: str, file_key: str, tag_key: str): Bucket=s3_bucket_name, Key=file_key, ) + status = None for key_value_pair in response["TagSet"]: if key_value_pair["Key"] == tag_key: status = str(key_value_pair["Value"]) diff --git a/lambdas/tests/unit/services/test_bulk_upload_service.py b/lambdas/tests/unit/services/test_bulk_upload_service.py index a79bd7370..212a3bc0b 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_service.py @@ -28,6 +28,7 @@ def test_handle_sqs_message_calls_create_lg_records_and_copy_files_when_validati mock_report_upload_complete = mocker.patch.object( BulkUploadService, "report_upload_complete" ) + mocker.patch.object(BulkUploadService, "validate_files", return_value=None) service = BulkUploadService() diff --git a/lambdas/utils/exceptions.py b/lambdas/utils/exceptions.py index 1b0254fdc..f78595b24 100644 --- a/lambdas/utils/exceptions.py +++ b/lambdas/utils/exceptions.py @@ -62,3 +62,7 @@ class VirusNoResultException(Exception): class VirusFailedException(Exception): pass + + +class TagNotFoundException(Exception): + pass From 996e667cc08053e62f5e7ee21ee8afe87fd0bb28 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Wed, 25 Oct 2023 15:06:27 +0100 Subject: [PATCH 20/46] [PRMDR-372] Fix import error --- lambdas/services/bulk_upload_service.py | 17 +++++++++++------ lambdas/services/s3_service.py | 2 +- .../unit/services/test_document_service.py | 2 +- .../decorators/test_validate_document_type.py | 2 +- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index 2d6884e89..469b070c0 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -10,10 +10,12 @@ from services.dynamo_service import DynamoDBService from services.s3_service import S3Service from services.sqs_service import SQSService -from utils.exceptions import (InvalidMessageException, VirusFailedException, - VirusNoResultException) -from utils.lloyd_george_validator import (LGInvalidFilesException, - validate_lg_file_names) +from utils.exceptions import ( + InvalidMessageException, + VirusFailedException, + VirusNoResultException, +) +from utils.lloyd_george_validator import LGInvalidFilesException, validate_lg_file_names from utils.utilities import create_reference_id logger = logging.getLogger() @@ -73,13 +75,16 @@ def handle_sqs_message(self, message: dict): f"Waiting on virus scan results for: {staging_metadata.nhs_number}, adding message back to queue" ) logger.info("Will stop processing Lloyd George record for this patient") - raise e + return except VirusFailedException as e: logger.info( f"Virus scan results check failed for: {staging_metadata.nhs_number}, removing from queue" ) logger.info("Will stop processing Lloyd George record for this patient") - raise e + + failure_reason = "One or more of the files failed virus scanner check" + self.report_upload_failure(staging_metadata, failure_reason) + return logger.info( "Virus result validation complete. Start uploading Lloyd George records" diff --git a/lambdas/services/s3_service.py b/lambdas/services/s3_service.py index 408437a98..20d9d5d08 100644 --- a/lambdas/services/s3_service.py +++ b/lambdas/services/s3_service.py @@ -3,7 +3,7 @@ import boto3 from botocore.client import Config as BotoConfig -from lambdas.utils.exceptions import TagNotFoundException +from utils.exceptions import TagNotFoundException logger = logging.getLogger() logger.setLevel(logging.INFO) diff --git a/lambdas/tests/unit/services/test_document_service.py b/lambdas/tests/unit/services/test_document_service.py index 61302bb72..92680d935 100644 --- a/lambdas/tests/unit/services/test_document_service.py +++ b/lambdas/tests/unit/services/test_document_service.py @@ -7,7 +7,7 @@ from tests.unit.helpers.data.dynamo_responses import (MOCK_EMPTY_RESPONSE, MOCK_SEARCH_RESPONSE) -from lambdas.services.document_service import DocumentService +from services.document_service import DocumentService @pytest.fixture diff --git a/lambdas/tests/unit/utils/decorators/test_validate_document_type.py b/lambdas/tests/unit/utils/decorators/test_validate_document_type.py index 06b8556f4..7c830c2ef 100755 --- a/lambdas/tests/unit/utils/decorators/test_validate_document_type.py +++ b/lambdas/tests/unit/utils/decorators/test_validate_document_type.py @@ -1,6 +1,6 @@ from utils.lambda_response import ApiGatewayResponse -from lambdas.utils.decorators.validate_document_type import \ +from utils.decorators.validate_document_type import \ validate_document_type From 4665df15212ca6acd6c8e2c31c88d9ea92b7bf3f Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Wed, 25 Oct 2023 16:20:48 +0100 Subject: [PATCH 21/46] [PRMDR-372] modify bulk upload lambda --- lambdas/models/bulk_upload_status.py | 6 +-- lambdas/services/bulk_upload_service.py | 48 ++++++++++++------- lambdas/services/s3_service.py | 1 + lambdas/services/sqs_service.py | 8 +++- .../unit/models/test_bulk_upload_status.py | 8 ++-- .../unit/services/test_bulk_upload_service.py | 31 ++++++++---- .../tests/unit/services/test_sqs_service.py | 1 + 7 files changed, 67 insertions(+), 36 deletions(-) diff --git a/lambdas/models/bulk_upload_status.py b/lambdas/models/bulk_upload_status.py index 324cad1d2..e65395a0b 100644 --- a/lambdas/models/bulk_upload_status.py +++ b/lambdas/models/bulk_upload_status.py @@ -10,7 +10,7 @@ class UploadStatusBaseClass(BaseModel): model_config = ConfigDict(alias_generator=to_capwords, populate_by_name=True) id: str = Field(alias="ID", default_factory=create_reference_id) nhs_number: str - timestamp: str = Field(default_factory=lambda: timestamp_as_string(now())) + timestamp: int = Field(default_factory=lambda: int(now().timestamp())) date: str = Field(default_factory=lambda: date_as_string(now())) file_path: str @@ -37,9 +37,5 @@ def now() -> datetime: return datetime.now() -def timestamp_as_string(time_now: datetime) -> str: - return str(time_now.timestamp()) - - def date_as_string(time_now: datetime) -> str: return time_now.strftime("%Y-%m-%d") diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index 469b070c0..9dbb630a9 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -14,6 +14,7 @@ InvalidMessageException, VirusFailedException, VirusNoResultException, + TagNotFoundException, ) from utils.lloyd_george_validator import LGInvalidFilesException, validate_lg_file_names from utils.utilities import create_reference_id @@ -122,29 +123,40 @@ def validate_files(self, staging_metadata: StagingMetadata): def check_virus_result(self, staging_metadata: StagingMetadata): for file_metadata in staging_metadata.files: source_file_key = self.strip_leading_slash(file_metadata.file_path) - scan_result = self.s3_service.get_tag_value( - self.staging_bucket_name, source_file_key, "scan-result" - ) - if scan_result == "Clean": - continue - elif scan_result == "Infected": - logger.info( - f"Found infected document(s) for scanId: {file_metadata.scan_id}" + try: + scan_result = self.s3_service.get_tag_value( + self.staging_bucket_name, source_file_key, "scan-result" ) - raise VirusFailedException - else: - sqs_service = SQSService() - nhs_number = staging_metadata.nhs_number + if scan_result == "Clean": + continue + elif scan_result == "Infected": + logger.info( + f"Found infected document(s) for scanId: {file_metadata.scan_id}" + ) + raise VirusFailedException + else: + logger.info( + f"Found no virus scan results for scanId: {file_metadata.scan_id}" + ) + self.put_message_back_to_queue(staging_metadata) + + except TagNotFoundException: logger.info( f"Found no virus scan results for scanId: {file_metadata.scan_id}" ) + self.put_message_back_to_queue(staging_metadata) - sqs_service.send_message_with_nhs_number_attr( - queue_url=self.metadata_queue_url, - message_body=staging_metadata.model_dump_json(by_alias=True), - nhs_number=nhs_number, - ) - raise VirusNoResultException + def put_message_back_to_queue(self, staging_metadata: StagingMetadata): + sqs_service = SQSService() + nhs_number = staging_metadata.nhs_number + + sqs_service.send_message_with_nhs_number_attr( + queue_url=self.metadata_queue_url, + message_body=staging_metadata.model_dump_json(by_alias=True), + nhs_number=nhs_number, + delay_second=60 * 5, + ) + raise VirusNoResultException def init_transaction(self): self.dynamo_records_in_transaction = [] diff --git a/lambdas/services/s3_service.py b/lambdas/services/s3_service.py index 20d9d5d08..ba8a98a69 100644 --- a/lambdas/services/s3_service.py +++ b/lambdas/services/s3_service.py @@ -3,6 +3,7 @@ import boto3 from botocore.client import Config as BotoConfig + from utils.exceptions import TagNotFoundException logger = logging.getLogger() diff --git a/lambdas/services/sqs_service.py b/lambdas/services/sqs_service.py index 6662d0c7f..5b2e99aeb 100644 --- a/lambdas/services/sqs_service.py +++ b/lambdas/services/sqs_service.py @@ -1,4 +1,5 @@ import logging +from typing import Optional import boto3 from botocore.client import Config as BotoConfig @@ -13,7 +14,11 @@ def __init__(self): self.client = boto3.client("sqs", config=config) def send_message_with_nhs_number_attr( - self, queue_url: str, message_body: str, nhs_number: str + self, + queue_url: str, + message_body: str, + nhs_number: str, + delay_second: Optional[int] = None, ): self.client.send_message( QueueUrl=queue_url, @@ -21,4 +26,5 @@ def send_message_with_nhs_number_attr( "NhsNumber": {"DataType": "String", "StringValue": nhs_number}, }, MessageBody=message_body, + DelaySeconds=delay_second, ) diff --git a/lambdas/tests/unit/models/test_bulk_upload_status.py b/lambdas/tests/unit/models/test_bulk_upload_status.py index b99545021..a7559f924 100644 --- a/lambdas/tests/unit/models/test_bulk_upload_status.py +++ b/lambdas/tests/unit/models/test_bulk_upload_status.py @@ -27,7 +27,7 @@ def test_parse_json_into_successful_upload(): expected = SuccessfulUpload( ID="719ca7a7-9c30-48f3-a472-c3daaf30d548", nhs_number="9000000009", - timestamp="1698146661", + timestamp=1698146661, date="2023-10-24", upload_status="complete", file_path="/9000000009/1of1_Lloyd_George_Record_[Joe Bloggs]_[9000000009]_[25-12-2019]", @@ -43,7 +43,7 @@ def test_parse_json_into_failed_upload(): expected = FailedUpload( ID="719ca7a7-9c30-48f3-a472-c3daaf30e975", nhs_number="9000000025", - timestamp="1698109408", + timestamp=1698109408, date="2023-10-24", upload_status="failed", failure_reason="File name not matching Lloyd George naming convention", @@ -82,7 +82,7 @@ def test_ids_and_timestamp_are_auto_populated_if_not_given(mocker): ) assert upload_status.date == "2023-10-20" - assert upload_status.timestamp == "1697793900.0" + assert upload_status.timestamp == 1697793900 assert upload_status.id == "mocked_uuid" assert upload_status.upload_status == "complete" print(upload_status.model_dump()) @@ -94,6 +94,6 @@ def test_ids_and_timestamp_are_auto_populated_if_not_given(mocker): ) assert upload_status_failed.date == "2023-10-20" - assert upload_status_failed.timestamp == "1697793900.0" + assert upload_status_failed.timestamp == 1697793900 assert upload_status_failed.id == "mocked_uuid" assert upload_status_failed.upload_status == "failed" diff --git a/lambdas/tests/unit/services/test_bulk_upload_service.py b/lambdas/tests/unit/services/test_bulk_upload_service.py index 212a3bc0b..c8287f86e 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_service.py @@ -1,13 +1,22 @@ import pytest from botocore.exceptions import ClientError from services.bulk_upload_service import BulkUploadService -from tests.unit.conftest import (MOCK_LG_BUCKET, MOCK_LG_STAGING_STORE_BUCKET, - MOCK_LG_TABLE_NAME, TEST_OBJECT_KEY) +from tests.unit.conftest import ( + MOCK_LG_BUCKET, + MOCK_LG_STAGING_STORE_BUCKET, + MOCK_LG_TABLE_NAME, + TEST_OBJECT_KEY, +) from tests.unit.helpers.data.bulk_upload.test_data import ( - TEST_DOCUMENT_REFERENCE, TEST_DOCUMENT_REFERENCE_LIST, TEST_FILE_METADATA, - TEST_NHS_NUMBER_FOR_BULK_UPLOAD, TEST_SQS_MESSAGE, - TEST_SQS_MESSAGE_WITH_INVALID_FILENAME, TEST_STAGING_METADATA, - TEST_STAGING_METADATA_WITH_INVALID_FILENAME) + TEST_DOCUMENT_REFERENCE, + TEST_DOCUMENT_REFERENCE_LIST, + TEST_FILE_METADATA, + TEST_NHS_NUMBER_FOR_BULK_UPLOAD, + TEST_SQS_MESSAGE, + TEST_SQS_MESSAGE_WITH_INVALID_FILENAME, + TEST_STAGING_METADATA, + TEST_STAGING_METADATA_WITH_INVALID_FILENAME, +) from utils.exceptions import InvalidMessageException from utils.lloyd_george_validator import LGInvalidFilesException @@ -19,8 +28,14 @@ def mock_uuid(mocker): yield test_uuid +@pytest.fixture +def mock_check_virus_result(mocker): + mocker.patch.object(BulkUploadService, "check_virus_result") + yield + + def test_handle_sqs_message_calls_create_lg_records_and_copy_files_when_validation_passed( - set_env, mocker, mock_uuid + set_env, mocker, mock_uuid, mock_check_virus_result ): mock_create_lg_records_and_copy_files = mocker.patch.object( BulkUploadService, "create_lg_records_and_copy_files" @@ -62,7 +77,7 @@ def test_handle_sqs_message_calls_report_upload_failure_when_lg_file_are_invalid def test_handle_sqs_message_rollback_transaction_when_validation_pass_but_file_transfer_failed_halfway( - set_env, mocker, mock_uuid + set_env, mocker, mock_uuid, mock_check_virus_result ): mocked_rollback_transaction = mocker.patch.object( BulkUploadService, "rollback_transaction" diff --git a/lambdas/tests/unit/services/test_sqs_service.py b/lambdas/tests/unit/services/test_sqs_service.py index 2f844a384..c629b140a 100644 --- a/lambdas/tests/unit/services/test_sqs_service.py +++ b/lambdas/tests/unit/services/test_sqs_service.py @@ -31,4 +31,5 @@ def return_mock(service_name, **_kwargs): "NhsNumber": {"DataType": "String", "StringValue": TEST_NHS_NUMBER}, }, MessageBody=test_message_body, + DelaySeconds=None ) From df60d5ac24fe4554369c2080d50668e5de29b43a Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Wed, 25 Oct 2023 17:32:44 +0100 Subject: [PATCH 22/46] [PRMDR-372] Amend send_message_with_nhs_number_attr to have delay_second default as 0 instead of None --- lambdas/services/sqs_service.py | 2 +- lambdas/tests/unit/services/test_sqs_service.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lambdas/services/sqs_service.py b/lambdas/services/sqs_service.py index 5b2e99aeb..c5303befb 100644 --- a/lambdas/services/sqs_service.py +++ b/lambdas/services/sqs_service.py @@ -18,7 +18,7 @@ def send_message_with_nhs_number_attr( queue_url: str, message_body: str, nhs_number: str, - delay_second: Optional[int] = None, + delay_second: int = 0, ): self.client.send_message( QueueUrl=queue_url, diff --git a/lambdas/tests/unit/services/test_sqs_service.py b/lambdas/tests/unit/services/test_sqs_service.py index c629b140a..87a382813 100644 --- a/lambdas/tests/unit/services/test_sqs_service.py +++ b/lambdas/tests/unit/services/test_sqs_service.py @@ -31,5 +31,5 @@ def return_mock(service_name, **_kwargs): "NhsNumber": {"DataType": "String", "StringValue": TEST_NHS_NUMBER}, }, MessageBody=test_message_body, - DelaySeconds=None + DelaySeconds=0 ) From 080d5630749f1a3f2278bf804f0d4b5e7ec1d0c7 Mon Sep 17 00:00:00 2001 From: Rio Knightley Date: Thu, 26 Oct 2023 13:11:08 +0100 Subject: [PATCH 23/46] Add get object tag unit testt --- lambdas/tests/unit/services/test_s3_service.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/lambdas/tests/unit/services/test_s3_service.py b/lambdas/tests/unit/services/test_s3_service.py index 208994753..d7805b750 100755 --- a/lambdas/tests/unit/services/test_s3_service.py +++ b/lambdas/tests/unit/services/test_s3_service.py @@ -155,3 +155,21 @@ def test_create_object_tag(mocker): Key=TEST_FILE_NAME, Tagging={"TagSet": [{"Key": test_tag_key, "Value": test_tag_value}]}, ) + +def test_get_tag_value(mocker): + mocker.patch("boto3.client") + service = S3Service() + mock_get_object_tag = mocker.patch.object(service.client, "get_object_tagging") + + test_tag_key = "tag_key" + test_tag_value = "tag_name" + + service.client.get_object_tagging( + Bucket=MOCK_BUCKET, + Key=TEST_FILE_NAME, + ) + + mock_get_object_tag.assert_called_once_with( + Bucket=MOCK_BUCKET, + Key=TEST_FILE_NAME, + ) From d2967779e36a3810dd261738dd9890ae314496ec Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Mon, 30 Oct 2023 10:30:41 +0000 Subject: [PATCH 24/46] [PRMDR-369] Add unhappy path test for get_tag_value, minor refactoring --- lambdas/services/s3_service.py | 11 ++-- .../tests/unit/services/test_s3_service.py | 56 ++++++++++++++++--- 2 files changed, 52 insertions(+), 15 deletions(-) diff --git a/lambdas/services/s3_service.py b/lambdas/services/s3_service.py index ba8a98a69..08b571e96 100644 --- a/lambdas/services/s3_service.py +++ b/lambdas/services/s3_service.py @@ -81,16 +81,13 @@ def create_object_tag( }, ) - def get_tag_value(self, s3_bucket_name: str, file_key: str, tag_key: str): + def get_tag_value(self, s3_bucket_name: str, file_key: str, tag_key: str) -> str: response = self.client.get_object_tagging( Bucket=s3_bucket_name, Key=file_key, ) - status = None for key_value_pair in response["TagSet"]: if key_value_pair["Key"] == tag_key: - status = str(key_value_pair["Value"]) - if status: - return status - else: - raise TagNotFoundException + return key_value_pair["Value"] + + raise TagNotFoundException(f"Object {file_key} doesn't have a tag of key {tag_key}") diff --git a/lambdas/tests/unit/services/test_s3_service.py b/lambdas/tests/unit/services/test_s3_service.py index d7805b750..4dfaae7ce 100755 --- a/lambdas/tests/unit/services/test_s3_service.py +++ b/lambdas/tests/unit/services/test_s3_service.py @@ -1,6 +1,14 @@ +import pytest + from services.s3_service import S3Service -from tests.unit.conftest import (MOCK_BUCKET, TEST_FILE_KEY, TEST_FILE_NAME, - TEST_NHS_NUMBER, TEST_OBJECT_KEY) +from tests.unit.conftest import ( + MOCK_BUCKET, + TEST_FILE_KEY, + TEST_FILE_NAME, + TEST_NHS_NUMBER, + TEST_OBJECT_KEY, +) +from utils.exceptions import TagNotFoundException MOCK_PRESIGNED_POST_RESPONSE = { "url": "https://ndr-dev-document-store.s3.amazonaws.com/", @@ -156,20 +164,52 @@ def test_create_object_tag(mocker): Tagging={"TagSet": [{"Key": test_tag_key, "Value": test_tag_value}]}, ) + def test_get_tag_value(mocker): mocker.patch("boto3.client") service = S3Service() - mock_get_object_tag = mocker.patch.object(service.client, "get_object_tagging") - test_tag_key = "tag_key" test_tag_value = "tag_name" - service.client.get_object_tagging( - Bucket=MOCK_BUCKET, - Key=TEST_FILE_NAME, - ) + mock_response = { + "VersionId": "mock_version", + "TagSet": [ + {"Key": test_tag_key, "Value": test_tag_value}, + {"Key": "some_other_unrelated_tag", "Value": "abcd1234"}, + ], + } + + mock_get_object_tag = mocker.patch.object( + service.client, "get_object_tagging", return_value=mock_response + ) + + actual = service.get_tag_value( + s3_bucket_name=MOCK_BUCKET, file_key=TEST_FILE_NAME, tag_key=test_tag_key + ) + expected = test_tag_value + assert actual == expected mock_get_object_tag.assert_called_once_with( Bucket=MOCK_BUCKET, Key=TEST_FILE_NAME, ) + + +def test_get_tag_value_raise_error_when_object_dont_have_tag(mocker): + mocker.patch("boto3.client") + service = S3Service() + test_tag_key = "tag_key" + + mock_response = { + "VersionId": "mock_version", + "TagSet": [], + } + + mocker.patch.object( + service.client, "get_object_tagging", return_value=mock_response + ) + + with pytest.raises(TagNotFoundException): + service.get_tag_value( + s3_bucket_name=MOCK_BUCKET, file_key=TEST_FILE_NAME, tag_key=test_tag_key + ) From 2162fc01d568f95a0f7a06b3a1b774e1c8b3953d Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Mon, 30 Oct 2023 11:00:24 +0000 Subject: [PATCH 25/46] [PRMDR-369] Replace literal strings with enum / constants for virus scan result. Amend logic of check_virus_result to fit ticket description --- lambdas/enums/virus_scan_result.py | 12 +++++++ lambdas/services/bulk_upload_service.py | 31 +++++++++++-------- .../tests/unit/services/test_s3_service.py | 6 ++-- 3 files changed, 34 insertions(+), 15 deletions(-) create mode 100644 lambdas/enums/virus_scan_result.py diff --git a/lambdas/enums/virus_scan_result.py b/lambdas/enums/virus_scan_result.py new file mode 100644 index 000000000..416aee1a9 --- /dev/null +++ b/lambdas/enums/virus_scan_result.py @@ -0,0 +1,12 @@ +from enum import Enum + + +class VirusScanResult(Enum): + CLEAN = "Clean" + INFECTED = "Infected" + INFECTED_ALLOWED = "InfectedAllowed" + UNSCANNABLE = "Unscannable" + ERROR = "Error" + + +SCAN_RESULT_TAG_KEY = 'scan-result' diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index 9dbb630a9..92761ff38 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -4,6 +4,7 @@ import pydantic from botocore.exceptions import ClientError from enums.metadata_field_names import DocumentReferenceMetadataFields +from enums.virus_scan_result import SCAN_RESULT_TAG_KEY, VirusScanResult from models.bulk_upload_status import FailedUpload, SuccessfulUpload from models.nhs_document_reference import NHSDocumentReference from models.staging_metadata import MetadataFile, StagingMetadata @@ -72,12 +73,16 @@ def handle_sqs_message(self, message: dict): logger.info("Running validation for virus scan results...") self.check_virus_result(staging_metadata) except VirusNoResultException as e: + logger.info(e) logger.info( f"Waiting on virus scan results for: {staging_metadata.nhs_number}, adding message back to queue" ) + self.put_message_back_to_queue(staging_metadata) logger.info("Will stop processing Lloyd George record for this patient") return + except VirusFailedException as e: + logger.info(e) logger.info( f"Virus scan results check failed for: {staging_metadata.nhs_number}, removing from queue" ) @@ -125,26 +130,26 @@ def check_virus_result(self, staging_metadata: StagingMetadata): source_file_key = self.strip_leading_slash(file_metadata.file_path) try: scan_result = self.s3_service.get_tag_value( - self.staging_bucket_name, source_file_key, "scan-result" + self.staging_bucket_name, source_file_key, SCAN_RESULT_TAG_KEY ) - if scan_result == "Clean": + if scan_result == VirusScanResult.CLEAN: continue - elif scan_result == "Infected": - logger.info( - f"Found infected document(s) for scanId: {file_metadata.scan_id}" + elif scan_result == VirusScanResult.INFECTED: + raise VirusFailedException( + f"Found infected document filepath: {file_metadata.file_path}" ) - raise VirusFailedException else: - logger.info( - f"Found no virus scan results for scanId: {file_metadata.scan_id}" + # handle cases other than Clean or Infected e.g. Unscannable, Error + raise VirusFailedException( + f"Failed to scan document filepath: {file_metadata.file_path}, scan result was {scan_result}" ) - self.put_message_back_to_queue(staging_metadata) except TagNotFoundException: - logger.info( - f"Found no virus scan results for scanId: {file_metadata.scan_id}" - ) - self.put_message_back_to_queue(staging_metadata) + raise VirusNoResultException(f"Virus scan result not found for document: {file_metadata.file_path}") + + logger.info( + f"Verified that all documents for patient {staging_metadata.nhs_number} are clean." + ) def put_message_back_to_queue(self, staging_metadata: StagingMetadata): sqs_service = SQSService() diff --git a/lambdas/tests/unit/services/test_s3_service.py b/lambdas/tests/unit/services/test_s3_service.py index 4dfaae7ce..644e70f5e 100755 --- a/lambdas/tests/unit/services/test_s3_service.py +++ b/lambdas/tests/unit/services/test_s3_service.py @@ -195,14 +195,16 @@ def test_get_tag_value(mocker): ) -def test_get_tag_value_raise_error_when_object_dont_have_tag(mocker): +def test_get_tag_value_raise_error_when_object_dont_have_the_specific_tag(mocker): mocker.patch("boto3.client") service = S3Service() test_tag_key = "tag_key" mock_response = { "VersionId": "mock_version", - "TagSet": [], + "TagSet": [ + {"Key": "some_other_unrelated_tag", "Value": "abcd1234"}, + ], } mocker.patch.object( From 0f44e814c1e53fdb82bb41eee89b515819205b03 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Mon, 30 Oct 2023 12:06:12 +0000 Subject: [PATCH 26/46] [PRMDR-369] Add unit tests for check_virus_result, add logic to handle the case when required file not exist in S3 --- lambdas/services/bulk_upload_service.py | 27 +++-- .../unit/services/test_bulk_upload_service.py | 98 ++++++++++++++++++- lambdas/utils/exceptions.py | 12 ++- 3 files changed, 126 insertions(+), 11 deletions(-) diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index 92761ff38..0e38104bb 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -13,9 +13,11 @@ from services.sqs_service import SQSService from utils.exceptions import ( InvalidMessageException, - VirusFailedException, - VirusNoResultException, TagNotFoundException, + DocumentInfectedException, + VirusScanFailedException, + VirusScanNoResultException, + S3FileNotFoundException, ) from utils.lloyd_george_validator import LGInvalidFilesException, validate_lg_file_names from utils.utilities import create_reference_id @@ -128,6 +130,7 @@ def validate_files(self, staging_metadata: StagingMetadata): def check_virus_result(self, staging_metadata: StagingMetadata): for file_metadata in staging_metadata.files: source_file_key = self.strip_leading_slash(file_metadata.file_path) + file_path = file_metadata.file_path try: scan_result = self.s3_service.get_tag_value( self.staging_bucket_name, source_file_key, SCAN_RESULT_TAG_KEY @@ -135,17 +138,25 @@ def check_virus_result(self, staging_metadata: StagingMetadata): if scan_result == VirusScanResult.CLEAN: continue elif scan_result == VirusScanResult.INFECTED: - raise VirusFailedException( - f"Found infected document filepath: {file_metadata.file_path}" + raise DocumentInfectedException( + f"Found infected document filepath: {file_path}" ) else: # handle cases other than Clean or Infected e.g. Unscannable, Error - raise VirusFailedException( - f"Failed to scan document filepath: {file_metadata.file_path}, scan result was {scan_result}" + raise VirusScanFailedException( + f"Failed to scan document filepath: {file_path}, scan result was {scan_result}" ) - except TagNotFoundException: - raise VirusNoResultException(f"Virus scan result not found for document: {file_metadata.file_path}") + raise VirusScanNoResultException( + f"Virus scan result not found for document: {file_path}" + ) + except ClientError as e: + if "AccessDenied" in str(e) or "NoSuchKey" in str(e): + logger.info(f"Failed to check object tag for given file_path: {file_path}") + logger.info("file_path may be incorrect or contain invalid character") + raise S3FileNotFoundException(f"Failed to access file {file_path}") + else: + raise e logger.info( f"Verified that all documents for patient {staging_metadata.nhs_number} are clean." diff --git a/lambdas/tests/unit/services/test_bulk_upload_service.py b/lambdas/tests/unit/services/test_bulk_upload_service.py index c8287f86e..8ca75a608 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_service.py @@ -1,5 +1,7 @@ import pytest from botocore.exceptions import ClientError + +from enums.virus_scan_result import VirusScanResult from services.bulk_upload_service import BulkUploadService from tests.unit.conftest import ( MOCK_LG_BUCKET, @@ -17,7 +19,14 @@ TEST_STAGING_METADATA, TEST_STAGING_METADATA_WITH_INVALID_FILENAME, ) -from utils.exceptions import InvalidMessageException +from utils.exceptions import ( + InvalidMessageException, + TagNotFoundException, + VirusScanNoResultException, + DocumentInfectedException, + VirusScanFailedException, + S3FileNotFoundException, +) from utils.lloyd_george_validator import LGInvalidFilesException @@ -126,6 +135,93 @@ def test_validate_files_raise_LGInvalidFilesException_when_file_names_invalid(se service.validate_files(TEST_STAGING_METADATA_WITH_INVALID_FILENAME) +def test_check_virus_result_raise_no_error_when_all_files_are_clean( + set_env, mocker, caplog +): + service = BulkUploadService() + service.s3_service = mocker.MagicMock() + service.s3_service.get_tag_value.return_value = VirusScanResult.CLEAN + + service.check_virus_result(TEST_STAGING_METADATA) + + assert ( + caplog.records[-1].message + == f"Verified that all documents for patient {TEST_STAGING_METADATA.nhs_number} are clean." + ) + + +def test_check_virus_result_raise_VirusScanNoResultException_when_one_file_not_scanned( + set_env, mocker +): + service = BulkUploadService() + service.s3_service = mocker.MagicMock() + + service.s3_service.get_tag_value.side_effect = [ + VirusScanResult.CLEAN, + VirusScanResult.CLEAN, + TagNotFoundException, # the 3rd file is not scanned yet + ] + + with pytest.raises(VirusScanNoResultException): + service.check_virus_result(TEST_STAGING_METADATA) + + +def test_check_virus_result_raise_DocumentInfectedException_when_one_file_was_infected( + set_env, mocker +): + service = BulkUploadService() + service.s3_service = mocker.MagicMock() + + service.s3_service.get_tag_value.side_effect = [ + VirusScanResult.CLEAN, + VirusScanResult.INFECTED, + VirusScanResult.CLEAN, + ] + + with pytest.raises(DocumentInfectedException): + service.check_virus_result(TEST_STAGING_METADATA) + + +def test_check_virus_result_raise_S3FileNotFoundException_when_one_file_not_exist_in_bucket( + set_env, mocker +): + service = BulkUploadService() + service.s3_service = mocker.MagicMock() + + mock_s3_exception = ClientError( + {"Error": {"Code": "AccessDenied", "Message": "Access Denied"}}, + "GetObjectTagging", + ) + + service.s3_service.get_tag_value.side_effect = [ + VirusScanResult.CLEAN, + VirusScanResult.CLEAN, + mock_s3_exception, + ] + + with pytest.raises(S3FileNotFoundException): + service.check_virus_result(TEST_STAGING_METADATA) + + +def test_check_virus_result_raise_VirusScanFailedException_for_special_cases( + set_env, mocker +): + service = BulkUploadService() + service.s3_service = mocker.MagicMock() + + cases_to_test_for = [ + VirusScanResult.UNSCANNABLE, + VirusScanResult.ERROR, + VirusScanResult.INFECTED_ALLOWED, + "some_other_unexpected_value", + ] + + for tag_value in cases_to_test_for: + service.s3_service.get_tag_value.return_value = tag_value + with pytest.raises(VirusScanFailedException): + service.check_virus_result(TEST_STAGING_METADATA) + + def test_create_lg_records_and_copy_files(set_env, mocker, mock_uuid): service = BulkUploadService() service.s3_service = mocker.MagicMock() diff --git a/lambdas/utils/exceptions.py b/lambdas/utils/exceptions.py index f78595b24..54f10a90a 100644 --- a/lambdas/utils/exceptions.py +++ b/lambdas/utils/exceptions.py @@ -56,11 +56,19 @@ class InvalidDocumentReferenceException(Exception): pass -class VirusNoResultException(Exception): +class VirusScanNoResultException(Exception): pass -class VirusFailedException(Exception): +class DocumentInfectedException(Exception): + pass + + +class VirusScanFailedException(Exception): + pass + + +class S3FileNotFoundException(Exception): pass From a0d96a43e9d0327dc101df55a7b8bc6ddcd55a3f Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Mon, 30 Oct 2023 12:26:17 +0000 Subject: [PATCH 27/46] [PRMDR-372] Amend mock test data of Bulk Upload to fit default mock patient of mock pds service --- lambdas/services/bulk_upload_service.py | 5 ++--- .../helpers/data/bulk_upload/test_data.py | 10 +++++----- .../unit/services/test_bulk_upload_service.py | 19 +++++++++++++++++-- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index 0e38104bb..7a6828b0b 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -74,7 +74,7 @@ def handle_sqs_message(self, message: dict): try: logger.info("Running validation for virus scan results...") self.check_virus_result(staging_metadata) - except VirusNoResultException as e: + except VirusScanNoResultException as e: logger.info(e) logger.info( f"Waiting on virus scan results for: {staging_metadata.nhs_number}, adding message back to queue" @@ -82,8 +82,7 @@ def handle_sqs_message(self, message: dict): self.put_message_back_to_queue(staging_metadata) logger.info("Will stop processing Lloyd George record for this patient") return - - except VirusFailedException as e: + except (VirusScanFailedException, DocumentInfectedException) as e: logger.info(e) logger.info( f"Virus scan results check failed for: {staging_metadata.nhs_number}, removing from queue" diff --git a/lambdas/tests/unit/helpers/data/bulk_upload/test_data.py b/lambdas/tests/unit/helpers/data/bulk_upload/test_data.py index 86079e09b..4c1970c75 100644 --- a/lambdas/tests/unit/helpers/data/bulk_upload/test_data.py +++ b/lambdas/tests/unit/helpers/data/bulk_upload/test_data.py @@ -66,14 +66,14 @@ def readfile(filename: str) -> str: ) -def make_valid_lg_file_names(total_number: int, nhs_number: str = "1234567890"): +def make_valid_lg_file_names(total_number: int, nhs_number: str = "9000000009"): return [ - f"{i}of{total_number}_Lloyd_George_Record_[Joe Bloggs]_[{nhs_number}]_[25-12-2019].pdf" + f"{i}of{total_number}_Lloyd_George_Record_[Jane Smith]_[{nhs_number}]_[22-10-2010].pdf" for i in range(1, total_number + 1) ] -def build_test_staging_metadata(file_names: list[str], nhs_number: str = "1234567890"): +def build_test_staging_metadata(file_names: list[str], nhs_number: str = "9000000009"): files = [] for file_name in file_names: source_file_path = f"/{nhs_number}/{file_name}" @@ -93,7 +93,7 @@ def build_test_sqs_message(staging_metadata: StagingMetadata): } -def build_test_document_reference(file_name: str, nhs_number: str = "1234567890"): +def build_test_document_reference(file_name: str, nhs_number: str = "9000000009"): return NHSDocumentReference( nhs_number=nhs_number, content_type="application/pdf", @@ -103,7 +103,7 @@ def build_test_document_reference(file_name: str, nhs_number: str = "1234567890" ) -TEST_NHS_NUMBER_FOR_BULK_UPLOAD = "1234567890" +TEST_NHS_NUMBER_FOR_BULK_UPLOAD = "9000000009" TEST_STAGING_METADATA = build_test_staging_metadata(make_valid_lg_file_names(3)) TEST_SQS_MESSAGE = build_test_sqs_message(TEST_STAGING_METADATA) TEST_FILE_METADATA = TEST_STAGING_METADATA.files[0] diff --git a/lambdas/tests/unit/services/test_bulk_upload_service.py b/lambdas/tests/unit/services/test_bulk_upload_service.py index 8ca75a608..f7103eafb 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_service.py @@ -39,8 +39,7 @@ def mock_uuid(mocker): @pytest.fixture def mock_check_virus_result(mocker): - mocker.patch.object(BulkUploadService, "check_virus_result") - yield + yield mocker.patch.object(BulkUploadService, "check_virus_result") def test_handle_sqs_message_calls_create_lg_records_and_copy_files_when_validation_passed( @@ -85,6 +84,22 @@ def test_handle_sqs_message_calls_report_upload_failure_when_lg_file_are_invalid ) +def test_test_handle_sqs_message_report_failure_when_document_infected( + set_env, mocker, mock_uuid, mock_check_virus_result +): + mock_check_virus_result.side_effect = DocumentInfectedException + mocked_report_upload_failure = mocker.patch.object( + BulkUploadService, "report_upload_failure" + ) + + service = BulkUploadService() + service.handle_sqs_message(message=TEST_SQS_MESSAGE) + + mocked_report_upload_failure.assert_called_with( + TEST_STAGING_METADATA, "One or more of the files failed virus scanner check" + ) + + def test_handle_sqs_message_rollback_transaction_when_validation_pass_but_file_transfer_failed_halfway( set_env, mocker, mock_uuid, mock_check_virus_result ): From a4902d5d917f12e9bacf92a2fb27cfbd9d0fbb9a Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Mon, 30 Oct 2023 17:52:22 +0000 Subject: [PATCH 28/46] [PRMDR-372] Add unit tests for bulk upload service --- lambdas/services/bulk_upload_service.py | 36 +++-- lambdas/services/sqs_service.py | 4 +- .../helpers/data/bulk_upload/test_data.py | 2 +- .../unit/services/test_bulk_upload_service.py | 132 ++++++++++++++++-- 4 files changed, 149 insertions(+), 25 deletions(-) diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index 7a6828b0b..9d53349dd 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -80,7 +80,6 @@ def handle_sqs_message(self, message: dict): f"Waiting on virus scan results for: {staging_metadata.nhs_number}, adding message back to queue" ) self.put_message_back_to_queue(staging_metadata) - logger.info("Will stop processing Lloyd George record for this patient") return except (VirusScanFailedException, DocumentInfectedException) as e: logger.info(e) @@ -89,8 +88,21 @@ def handle_sqs_message(self, message: dict): ) logger.info("Will stop processing Lloyd George record for this patient") - failure_reason = "One or more of the files failed virus scanner check" - self.report_upload_failure(staging_metadata, failure_reason) + self.report_upload_failure( + staging_metadata, "One or more of the files failed virus scanner check" + ) + return + except S3FileNotFoundException as e: + logger.info(e) + logger.info( + f"One or more of the files is not accessible from S3 bucket for patient {staging_metadata.nhs_number}" + ) + logger.info("Will stop processing Lloyd George record for this patient") + + self.report_upload_failure( + staging_metadata, + "One or more of the files is not accessible from staging bucket", + ) return logger.info( @@ -151,8 +163,12 @@ def check_virus_result(self, staging_metadata: StagingMetadata): ) except ClientError as e: if "AccessDenied" in str(e) or "NoSuchKey" in str(e): - logger.info(f"Failed to check object tag for given file_path: {file_path}") - logger.info("file_path may be incorrect or contain invalid character") + logger.info( + f"Failed to check object tag for given file_path: {file_path}" + ) + logger.info( + "file_path may be incorrect or contain invalid character" + ) raise S3FileNotFoundException(f"Failed to access file {file_path}") else: raise e @@ -162,16 +178,12 @@ def check_virus_result(self, staging_metadata: StagingMetadata): ) def put_message_back_to_queue(self, staging_metadata: StagingMetadata): - sqs_service = SQSService() - nhs_number = staging_metadata.nhs_number - - sqs_service.send_message_with_nhs_number_attr( + self.sqs_service.send_message_with_nhs_number_attr( queue_url=self.metadata_queue_url, message_body=staging_metadata.model_dump_json(by_alias=True), - nhs_number=nhs_number, - delay_second=60 * 5, + nhs_number=staging_metadata.nhs_number, + delay_seconds=60 * 5, ) - raise VirusNoResultException def init_transaction(self): self.dynamo_records_in_transaction = [] diff --git a/lambdas/services/sqs_service.py b/lambdas/services/sqs_service.py index c5303befb..8cb4d007f 100644 --- a/lambdas/services/sqs_service.py +++ b/lambdas/services/sqs_service.py @@ -18,7 +18,7 @@ def send_message_with_nhs_number_attr( queue_url: str, message_body: str, nhs_number: str, - delay_second: int = 0, + delay_seconds: int = 0, ): self.client.send_message( QueueUrl=queue_url, @@ -26,5 +26,5 @@ def send_message_with_nhs_number_attr( "NhsNumber": {"DataType": "String", "StringValue": nhs_number}, }, MessageBody=message_body, - DelaySeconds=delay_second, + DelaySeconds=delay_seconds, ) diff --git a/lambdas/tests/unit/helpers/data/bulk_upload/test_data.py b/lambdas/tests/unit/helpers/data/bulk_upload/test_data.py index 4c1970c75..e9e9f2707 100644 --- a/lambdas/tests/unit/helpers/data/bulk_upload/test_data.py +++ b/lambdas/tests/unit/helpers/data/bulk_upload/test_data.py @@ -129,4 +129,4 @@ def build_test_document_reference(file_name: str, nhs_number: str = "9000000009" TEST_SQS_MESSAGE_WITH_INVALID_FILENAME, TEST_SQS_MESSAGE, ] -} +} \ No newline at end of file diff --git a/lambdas/tests/unit/services/test_bulk_upload_service.py b/lambdas/tests/unit/services/test_bulk_upload_service.py index f7103eafb..d5ca7f960 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_service.py @@ -1,5 +1,6 @@ import pytest from botocore.exceptions import ClientError +from freezegun import freeze_time from enums.virus_scan_result import VirusScanResult from services.bulk_upload_service import BulkUploadService @@ -8,6 +9,8 @@ MOCK_LG_STAGING_STORE_BUCKET, MOCK_LG_TABLE_NAME, TEST_OBJECT_KEY, + MOCK_LG_METADATA_SQS_QUEUE, + MOCK_BULK_UPLOAD_DYNAMODB, ) from tests.unit.helpers.data.bulk_upload.test_data import ( TEST_DOCUMENT_REFERENCE, @@ -42,8 +45,13 @@ def mock_check_virus_result(mocker): yield mocker.patch.object(BulkUploadService, "check_virus_result") +@pytest.fixture +def mock_validate_files(mocker): + yield mocker.patch.object(BulkUploadService, "validate_files") + + def test_handle_sqs_message_calls_create_lg_records_and_copy_files_when_validation_passed( - set_env, mocker, mock_uuid, mock_check_virus_result + set_env, mocker, mock_uuid, mock_check_virus_result, mock_validate_files ): mock_create_lg_records_and_copy_files = mocker.patch.object( BulkUploadService, "create_lg_records_and_copy_files" @@ -52,7 +60,7 @@ def test_handle_sqs_message_calls_create_lg_records_and_copy_files_when_validati BulkUploadService, "report_upload_complete" ) - mocker.patch.object(BulkUploadService, "validate_files", return_value=None) + mock_validate_files.return_value = None service = BulkUploadService() service.handle_sqs_message(message=TEST_SQS_MESSAGE) @@ -62,42 +70,86 @@ def test_handle_sqs_message_calls_create_lg_records_and_copy_files_when_validati def test_handle_sqs_message_calls_report_upload_failure_when_lg_file_are_invalid( - set_env, mocker, mock_uuid + set_env, mocker, mock_uuid, mock_validate_files ): mock_create_lg_records_and_copy_files = mocker.patch.object( BulkUploadService, "create_lg_records_and_copy_files" ) - mocked_report_upload_failure = mocker.patch.object( + mock_report_upload_failure = mocker.patch.object( BulkUploadService, "report_upload_failure" ) mocked_error = LGInvalidFilesException( "One or more of the files do not match naming convention" ) - mocker.patch.object(BulkUploadService, "validate_files", side_effect=mocked_error) + mock_validate_files.side_effect = mocked_error service = BulkUploadService() service.handle_sqs_message(message=TEST_SQS_MESSAGE_WITH_INVALID_FILENAME) mock_create_lg_records_and_copy_files.assert_not_called() - mocked_report_upload_failure.assert_called_with( + mock_report_upload_failure.assert_called_with( TEST_STAGING_METADATA_WITH_INVALID_FILENAME, str(mocked_error) ) -def test_test_handle_sqs_message_report_failure_when_document_infected( - set_env, mocker, mock_uuid, mock_check_virus_result +def test_handle_sqs_message_report_failure_when_document_is_infected( + set_env, mocker, mock_uuid, mock_validate_files, mock_check_virus_result ): mock_check_virus_result.side_effect = DocumentInfectedException - mocked_report_upload_failure = mocker.patch.object( + mock_report_upload_failure = mocker.patch.object( BulkUploadService, "report_upload_failure" ) + mock_create_lg_records_and_copy_files = mocker.patch.object( + BulkUploadService, "create_lg_records_and_copy_files" + ) service = BulkUploadService() service.handle_sqs_message(message=TEST_SQS_MESSAGE) - mocked_report_upload_failure.assert_called_with( + mock_report_upload_failure.assert_called_with( TEST_STAGING_METADATA, "One or more of the files failed virus scanner check" ) + mock_create_lg_records_and_copy_files.assert_not_called() + + +def test_handle_sqs_message_report_failure_when_document_not_exist( + set_env, mocker, mock_uuid, mock_validate_files, mock_check_virus_result +): + mock_check_virus_result.side_effect = S3FileNotFoundException + mock_report_upload_failure = mocker.patch.object( + BulkUploadService, "report_upload_failure" + ) + + service = BulkUploadService() + service.handle_sqs_message(message=TEST_SQS_MESSAGE) + + mock_report_upload_failure.assert_called_with( + TEST_STAGING_METADATA, + "One or more of the files is not accessible from staging bucket", + ) + + +def test_handle_sqs_message_put_message_back_to_queue_when_virus_scan_result_not_available( + set_env, mocker, mock_uuid, mock_validate_files, mock_check_virus_result +): + mock_check_virus_result.side_effect = VirusScanNoResultException + mock_report_upload_failure = mocker.patch.object( + BulkUploadService, "report_upload_failure" + ) + mock_create_lg_records_and_copy_files = mocker.patch.object( + BulkUploadService, "create_lg_records_and_copy_files" + ) + mock_put_message_back_to_queue = mocker.patch.object( + BulkUploadService, "put_message_back_to_queue" + ) + + service = BulkUploadService() + service.handle_sqs_message(message=TEST_SQS_MESSAGE) + + mock_put_message_back_to_queue.assert_called_with(TEST_STAGING_METADATA) + + mock_report_upload_failure.assert_not_called() + mock_create_lg_records_and_copy_files.assert_not_called() def test_handle_sqs_message_rollback_transaction_when_validation_pass_but_file_transfer_failed_halfway( @@ -237,6 +289,20 @@ def test_check_virus_result_raise_VirusScanFailedException_for_special_cases( service.check_virus_result(TEST_STAGING_METADATA) +def test_put_message_back_to_queue(set_env, mocker): + service = BulkUploadService() + service.sqs_service = mocker.MagicMock() + + service.put_message_back_to_queue(TEST_STAGING_METADATA) + + service.sqs_service.send_message_with_nhs_number_attr.assert_called_with( + queue_url=MOCK_LG_METADATA_SQS_QUEUE, + message_body=TEST_STAGING_METADATA.model_dump_json(by_alias=True), + nhs_number=TEST_STAGING_METADATA.nhs_number, + delay_seconds=60 * 5, + ) + + def test_create_lg_records_and_copy_files(set_env, mocker, mock_uuid): service = BulkUploadService() service.s3_service = mocker.MagicMock() @@ -310,3 +376,49 @@ def test_rollback_transaction(set_env, mocker, mock_uuid): file_key=f"{TEST_NHS_NUMBER_FOR_BULK_UPLOAD}/mock_uuid_2", ) assert service.s3_service.delete_object.call_count == 2 + + +@freeze_time("2023-10-1 13:00:00") +def test_report_upload_complete_add_record_to_dynamodb(set_env, mocker, mock_uuid): + service = BulkUploadService() + service.dynamo_service = mocker.MagicMock() + + service.report_upload_complete(TEST_STAGING_METADATA) + + assert service.dynamo_service.create_item.call_count == len(TEST_STAGING_METADATA.files) + + for file in TEST_STAGING_METADATA.files: + expected_dynamo_db_record = { + "Date": "2023-10-01", + "FilePath": file.file_path, + "ID": mock_uuid, + "NhsNumber": TEST_STAGING_METADATA.nhs_number, + "Timestamp": 1696165200, + "UploadStatus": "complete", + } + service.dynamo_service.create_item.assert_any_call( + item=expected_dynamo_db_record, table_name=MOCK_BULK_UPLOAD_DYNAMODB + ) + + +@freeze_time("2023-10-2 13:00:00") +def test_report_upload_failure_add_record_to_dynamodb(set_env, mocker, mock_uuid): + service = BulkUploadService() + service.dynamo_service = mocker.MagicMock() + + mock_failure_reason = "File name invalid" + service.report_upload_failure(TEST_STAGING_METADATA, failure_reason=mock_failure_reason) + + for file in TEST_STAGING_METADATA.files: + expected_dynamo_db_record = { + "Date": "2023-10-02", + "FilePath": file.file_path, + "ID": mock_uuid, + "NhsNumber": TEST_STAGING_METADATA.nhs_number, + "Timestamp": 1696251600, + "UploadStatus": "failed", + "FailureReason": mock_failure_reason + } + service.dynamo_service.create_item.assert_any_call( + item=expected_dynamo_db_record, table_name=MOCK_BULK_UPLOAD_DYNAMODB + ) From 67f120e9fbb6ef5bce46a9ae97a847752c60e0a7 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Mon, 30 Oct 2023 17:53:19 +0000 Subject: [PATCH 29/46] run formatter --- lambdas/enums/virus_scan_result.py | 2 +- lambdas/services/bulk_upload_service.py | 15 +++--- lambdas/services/s3_service.py | 5 +- lambdas/services/sqs_service.py | 1 - .../helpers/data/bulk_upload/test_data.py | 2 +- .../unit/services/test_bulk_upload_service.py | 50 ++++++++----------- .../unit/services/test_document_service.py | 3 +- .../tests/unit/services/test_s3_service.py | 10 +--- .../tests/unit/services/test_sqs_service.py | 2 +- .../decorators/test_validate_document_type.py | 4 +- 10 files changed, 36 insertions(+), 58 deletions(-) diff --git a/lambdas/enums/virus_scan_result.py b/lambdas/enums/virus_scan_result.py index 416aee1a9..6ee0c5ebd 100644 --- a/lambdas/enums/virus_scan_result.py +++ b/lambdas/enums/virus_scan_result.py @@ -9,4 +9,4 @@ class VirusScanResult(Enum): ERROR = "Error" -SCAN_RESULT_TAG_KEY = 'scan-result' +SCAN_RESULT_TAG_KEY = "scan-result" diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index 9d53349dd..955f64a56 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -11,15 +11,12 @@ from services.dynamo_service import DynamoDBService from services.s3_service import S3Service from services.sqs_service import SQSService -from utils.exceptions import ( - InvalidMessageException, - TagNotFoundException, - DocumentInfectedException, - VirusScanFailedException, - VirusScanNoResultException, - S3FileNotFoundException, -) -from utils.lloyd_george_validator import LGInvalidFilesException, validate_lg_file_names +from utils.exceptions import (DocumentInfectedException, + InvalidMessageException, S3FileNotFoundException, + TagNotFoundException, VirusScanFailedException, + VirusScanNoResultException) +from utils.lloyd_george_validator import (LGInvalidFilesException, + validate_lg_file_names) from utils.utilities import create_reference_id logger = logging.getLogger() diff --git a/lambdas/services/s3_service.py b/lambdas/services/s3_service.py index 08b571e96..b466d30d5 100644 --- a/lambdas/services/s3_service.py +++ b/lambdas/services/s3_service.py @@ -3,7 +3,6 @@ import boto3 from botocore.client import Config as BotoConfig - from utils.exceptions import TagNotFoundException logger = logging.getLogger() @@ -90,4 +89,6 @@ def get_tag_value(self, s3_bucket_name: str, file_key: str, tag_key: str) -> str if key_value_pair["Key"] == tag_key: return key_value_pair["Value"] - raise TagNotFoundException(f"Object {file_key} doesn't have a tag of key {tag_key}") + raise TagNotFoundException( + f"Object {file_key} doesn't have a tag of key {tag_key}" + ) diff --git a/lambdas/services/sqs_service.py b/lambdas/services/sqs_service.py index 8cb4d007f..846fa5c15 100644 --- a/lambdas/services/sqs_service.py +++ b/lambdas/services/sqs_service.py @@ -1,5 +1,4 @@ import logging -from typing import Optional import boto3 from botocore.client import Config as BotoConfig diff --git a/lambdas/tests/unit/helpers/data/bulk_upload/test_data.py b/lambdas/tests/unit/helpers/data/bulk_upload/test_data.py index e9e9f2707..4c1970c75 100644 --- a/lambdas/tests/unit/helpers/data/bulk_upload/test_data.py +++ b/lambdas/tests/unit/helpers/data/bulk_upload/test_data.py @@ -129,4 +129,4 @@ def build_test_document_reference(file_name: str, nhs_number: str = "9000000009" TEST_SQS_MESSAGE_WITH_INVALID_FILENAME, TEST_SQS_MESSAGE, ] -} \ No newline at end of file +} diff --git a/lambdas/tests/unit/services/test_bulk_upload_service.py b/lambdas/tests/unit/services/test_bulk_upload_service.py index d5ca7f960..a099c200c 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_service.py @@ -1,35 +1,21 @@ import pytest from botocore.exceptions import ClientError -from freezegun import freeze_time - from enums.virus_scan_result import VirusScanResult +from freezegun import freeze_time from services.bulk_upload_service import BulkUploadService -from tests.unit.conftest import ( - MOCK_LG_BUCKET, - MOCK_LG_STAGING_STORE_BUCKET, - MOCK_LG_TABLE_NAME, - TEST_OBJECT_KEY, - MOCK_LG_METADATA_SQS_QUEUE, - MOCK_BULK_UPLOAD_DYNAMODB, -) +from tests.unit.conftest import (MOCK_BULK_UPLOAD_DYNAMODB, MOCK_LG_BUCKET, + MOCK_LG_METADATA_SQS_QUEUE, + MOCK_LG_STAGING_STORE_BUCKET, + MOCK_LG_TABLE_NAME, TEST_OBJECT_KEY) from tests.unit.helpers.data.bulk_upload.test_data import ( - TEST_DOCUMENT_REFERENCE, - TEST_DOCUMENT_REFERENCE_LIST, - TEST_FILE_METADATA, - TEST_NHS_NUMBER_FOR_BULK_UPLOAD, - TEST_SQS_MESSAGE, - TEST_SQS_MESSAGE_WITH_INVALID_FILENAME, - TEST_STAGING_METADATA, - TEST_STAGING_METADATA_WITH_INVALID_FILENAME, -) -from utils.exceptions import ( - InvalidMessageException, - TagNotFoundException, - VirusScanNoResultException, - DocumentInfectedException, - VirusScanFailedException, - S3FileNotFoundException, -) + TEST_DOCUMENT_REFERENCE, TEST_DOCUMENT_REFERENCE_LIST, TEST_FILE_METADATA, + TEST_NHS_NUMBER_FOR_BULK_UPLOAD, TEST_SQS_MESSAGE, + TEST_SQS_MESSAGE_WITH_INVALID_FILENAME, TEST_STAGING_METADATA, + TEST_STAGING_METADATA_WITH_INVALID_FILENAME) +from utils.exceptions import (DocumentInfectedException, + InvalidMessageException, S3FileNotFoundException, + TagNotFoundException, VirusScanFailedException, + VirusScanNoResultException) from utils.lloyd_george_validator import LGInvalidFilesException @@ -385,7 +371,9 @@ def test_report_upload_complete_add_record_to_dynamodb(set_env, mocker, mock_uui service.report_upload_complete(TEST_STAGING_METADATA) - assert service.dynamo_service.create_item.call_count == len(TEST_STAGING_METADATA.files) + assert service.dynamo_service.create_item.call_count == len( + TEST_STAGING_METADATA.files + ) for file in TEST_STAGING_METADATA.files: expected_dynamo_db_record = { @@ -407,7 +395,9 @@ def test_report_upload_failure_add_record_to_dynamodb(set_env, mocker, mock_uuid service.dynamo_service = mocker.MagicMock() mock_failure_reason = "File name invalid" - service.report_upload_failure(TEST_STAGING_METADATA, failure_reason=mock_failure_reason) + service.report_upload_failure( + TEST_STAGING_METADATA, failure_reason=mock_failure_reason + ) for file in TEST_STAGING_METADATA.files: expected_dynamo_db_record = { @@ -417,7 +407,7 @@ def test_report_upload_failure_add_record_to_dynamodb(set_env, mocker, mock_uuid "NhsNumber": TEST_STAGING_METADATA.nhs_number, "Timestamp": 1696251600, "UploadStatus": "failed", - "FailureReason": mock_failure_reason + "FailureReason": mock_failure_reason, } service.dynamo_service.create_item.assert_any_call( item=expected_dynamo_db_record, table_name=MOCK_BULK_UPLOAD_DYNAMODB diff --git a/lambdas/tests/unit/services/test_document_service.py b/lambdas/tests/unit/services/test_document_service.py index 92680d935..006a99b9f 100644 --- a/lambdas/tests/unit/services/test_document_service.py +++ b/lambdas/tests/unit/services/test_document_service.py @@ -4,11 +4,10 @@ import boto3 import pytest from models.document_reference import DocumentReference +from services.document_service import DocumentService from tests.unit.helpers.data.dynamo_responses import (MOCK_EMPTY_RESPONSE, MOCK_SEARCH_RESPONSE) -from services.document_service import DocumentService - @pytest.fixture def nhs_number(): diff --git a/lambdas/tests/unit/services/test_s3_service.py b/lambdas/tests/unit/services/test_s3_service.py index 644e70f5e..222195ca7 100755 --- a/lambdas/tests/unit/services/test_s3_service.py +++ b/lambdas/tests/unit/services/test_s3_service.py @@ -1,13 +1,7 @@ import pytest - from services.s3_service import S3Service -from tests.unit.conftest import ( - MOCK_BUCKET, - TEST_FILE_KEY, - TEST_FILE_NAME, - TEST_NHS_NUMBER, - TEST_OBJECT_KEY, -) +from tests.unit.conftest import (MOCK_BUCKET, TEST_FILE_KEY, TEST_FILE_NAME, + TEST_NHS_NUMBER, TEST_OBJECT_KEY) from utils.exceptions import TagNotFoundException MOCK_PRESIGNED_POST_RESPONSE = { diff --git a/lambdas/tests/unit/services/test_sqs_service.py b/lambdas/tests/unit/services/test_sqs_service.py index 87a382813..220a38258 100644 --- a/lambdas/tests/unit/services/test_sqs_service.py +++ b/lambdas/tests/unit/services/test_sqs_service.py @@ -31,5 +31,5 @@ def return_mock(service_name, **_kwargs): "NhsNumber": {"DataType": "String", "StringValue": TEST_NHS_NUMBER}, }, MessageBody=test_message_body, - DelaySeconds=0 + DelaySeconds=0, ) diff --git a/lambdas/tests/unit/utils/decorators/test_validate_document_type.py b/lambdas/tests/unit/utils/decorators/test_validate_document_type.py index 7c830c2ef..a40abd65d 100755 --- a/lambdas/tests/unit/utils/decorators/test_validate_document_type.py +++ b/lambdas/tests/unit/utils/decorators/test_validate_document_type.py @@ -1,8 +1,6 @@ +from utils.decorators.validate_document_type import validate_document_type from utils.lambda_response import ApiGatewayResponse -from utils.decorators.validate_document_type import \ - validate_document_type - @validate_document_type def lambda_handler(event, context): From 0c05172732216d1b77141342b3c136849ccad663 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Tue, 31 Oct 2023 09:45:22 +0000 Subject: [PATCH 30/46] [PRMDR-372] Remove redundant handle_invalid_message method --- lambdas/handlers/bulk_upload_handler.py | 34 ++--------- .../unit/handlers/test_bulk_upload_handler.py | 57 +------------------ 2 files changed, 7 insertions(+), 84 deletions(-) diff --git a/lambdas/handlers/bulk_upload_handler.py b/lambdas/handlers/bulk_upload_handler.py index 6ba5447d1..ce19c526a 100644 --- a/lambdas/handlers/bulk_upload_handler.py +++ b/lambdas/handlers/bulk_upload_handler.py @@ -1,11 +1,6 @@ -import json import logging -import os from services.bulk_upload_service import BulkUploadService -from services.sqs_service import SQSService -from utils.exceptions import InvalidMessageException -from utils.lloyd_george_validator import LGInvalidFilesException logger = logging.getLogger() logger.setLevel(logging.INFO) @@ -23,28 +18,7 @@ def lambda_handler(event, _context): try: logger.info(f"Processing message {index} of {len(event['Records'])}") bulk_upload_service.handle_sqs_message(message) - except (InvalidMessageException, LGInvalidFilesException) as error: - handle_invalid_message(invalid_message=message, error=error) - - -def handle_invalid_message(invalid_message: dict, error=None): - # Currently we just send the invalid message to invalid queue. - # In future ticket, will change this to record errors in dynamo db - invalid_queue_url = os.environ["INVALID_SQS_QUEUE_URL"] - sqs_service = SQSService() - - new_message = {"original_message": invalid_message["body"]} - if error: - new_message["error"] = str(error) - - try: - nhs_number = invalid_message["messageAttributes"]["NhsNumber"]["stringValue"] - except KeyError: - nhs_number = "" - - sqs_service.send_message_with_nhs_number_attr( - queue_url=invalid_queue_url, - message_body=json.dumps(new_message), - nhs_number=nhs_number, - ) - logger.info(f"Sent message to invalid queue: {invalid_message}") + except Exception as error: + logger.info(f"Fail to process current message due to error: {error}") + logger.info("Continue on next message") + logger.info(f"Finished processing all {len(event['Records'])} messages") diff --git a/lambdas/tests/unit/handlers/test_bulk_upload_handler.py b/lambdas/tests/unit/handlers/test_bulk_upload_handler.py index 945ad7e7b..8a149e7b0 100644 --- a/lambdas/tests/unit/handlers/test_bulk_upload_handler.py +++ b/lambdas/tests/unit/handlers/test_bulk_upload_handler.py @@ -1,11 +1,6 @@ -import json -from unittest.mock import ANY - import pytest -from handlers.bulk_upload_handler import handle_invalid_message, lambda_handler -from tests.unit.conftest import MOCK_LG_INVALID_SQS_QUEUE -from tests.unit.helpers.data.bulk_upload.test_data import ( - TEST_EVENT_WITH_SQS_MESSAGES, TEST_NHS_NUMBER_FOR_BULK_UPLOAD) +from handlers.bulk_upload_handler import lambda_handler +from tests.unit.helpers.data.bulk_upload.test_data import TEST_EVENT_WITH_SQS_MESSAGES from utils.lloyd_george_validator import LGInvalidFilesException @@ -26,28 +21,8 @@ def test_lambda_process_each_sqs_message_one_by_one(set_env, mocked_service): mocked_service.handle_sqs_message.assert_any_call(message) -def test_lambda_calls_handle_invalid_message_when_validation_failed( - set_env, mocker, mocked_service -): - # emulate that validation error happen at 2nd message - mocked_service.handle_sqs_message.side_effect = [ - None, - LGInvalidFilesException, - None, - ] - mocked_handle_invalid_message = mocker.patch( - "handlers.bulk_upload_handler.handle_invalid_message" - ) - - lambda_handler(TEST_EVENT_WITH_SQS_MESSAGES, None) - - mocked_handle_invalid_message.assert_called_once_with( - invalid_message=TEST_EVENT_WITH_SQS_MESSAGES["Records"][1], error=ANY - ) - - def test_lambda_continue_process_next_message_after_handled_validation_error( - set_env, mocker, mocked_service + set_env, mocked_service ): # emulate that validation error happen at 2nd message mocked_service.handle_sqs_message.side_effect = [ @@ -55,35 +30,9 @@ def test_lambda_continue_process_next_message_after_handled_validation_error( LGInvalidFilesException, None, ] - mocker.patch("handlers.bulk_upload_handler.handle_invalid_message") lambda_handler(TEST_EVENT_WITH_SQS_MESSAGES, None) mocked_service.handle_sqs_message.assert_called_with( TEST_EVENT_WITH_SQS_MESSAGES["Records"][2] ) - - -def test_handle_invalid_message( - set_env, - mocker, -): - mocked_sqs_service = mocker.patch( - "handlers.bulk_upload_handler.SQSService" - ).return_value - - handle_invalid_message( - TEST_EVENT_WITH_SQS_MESSAGES["Records"][0], - error=LGInvalidFilesException("filename incorrect"), - ) - - expected_message_body = { - "original_message": TEST_EVENT_WITH_SQS_MESSAGES["Records"][0]["body"], - "error": str(LGInvalidFilesException("filename incorrect")), - } - - mocked_sqs_service.send_message_with_nhs_number_attr.assert_called_with( - queue_url=MOCK_LG_INVALID_SQS_QUEUE, - message_body=json.dumps(expected_message_body), - nhs_number=TEST_NHS_NUMBER_FOR_BULK_UPLOAD, - ) From 37c7e47ab2321e3953888e0bd722e13568205bbe Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Tue, 31 Oct 2023 09:51:49 +0000 Subject: [PATCH 31/46] run formatter --- .../handlers/bulk_upload_report_handler.py | 3 +- .../unit/handlers/test_bulk_upload_handler.py | 3 +- .../test_bulk_upload_report_handler.py | 29 +++++++------------ 3 files changed, 14 insertions(+), 21 deletions(-) diff --git a/lambdas/handlers/bulk_upload_report_handler.py b/lambdas/handlers/bulk_upload_report_handler.py index f9a5a7f1e..22a233560 100644 --- a/lambdas/handlers/bulk_upload_report_handler.py +++ b/lambdas/handlers/bulk_upload_report_handler.py @@ -5,7 +5,6 @@ from boto3.dynamodb.conditions import Attr from botocore.exceptions import ClientError - from services.dynamo_service import DynamoDBService from services.s3_service import S3Service from utils.decorators.ensure_env_var import ensure_environment_variables @@ -62,7 +61,7 @@ def get_dynamodb_report_items( bulk_upload_table_name, filter_expression=filter_time ) - if not "Items" in db_response: + if "Items" not in db_response: return None items = db_response["Items"] while "LastEvaluatedKey" in db_response: diff --git a/lambdas/tests/unit/handlers/test_bulk_upload_handler.py b/lambdas/tests/unit/handlers/test_bulk_upload_handler.py index 8a149e7b0..8ec29fab4 100644 --- a/lambdas/tests/unit/handlers/test_bulk_upload_handler.py +++ b/lambdas/tests/unit/handlers/test_bulk_upload_handler.py @@ -1,6 +1,7 @@ import pytest from handlers.bulk_upload_handler import lambda_handler -from tests.unit.helpers.data.bulk_upload.test_data import TEST_EVENT_WITH_SQS_MESSAGES +from tests.unit.helpers.data.bulk_upload.test_data import \ + TEST_EVENT_WITH_SQS_MESSAGES from utils.lloyd_george_validator import LGInvalidFilesException diff --git a/lambdas/tests/unit/handlers/test_bulk_upload_report_handler.py b/lambdas/tests/unit/handlers/test_bulk_upload_report_handler.py index 76120b0dc..858bdff53 100644 --- a/lambdas/tests/unit/handlers/test_bulk_upload_report_handler.py +++ b/lambdas/tests/unit/handlers/test_bulk_upload_report_handler.py @@ -5,25 +5,17 @@ from boto3.dynamodb.conditions import Attr from freezegun import freeze_time - -from handlers.bulk_upload_report_handler import ( - get_times_for_scan, - write_items_to_csv, - get_dynamodb_report_items, - report_handler, - write_empty_report -) +from handlers.bulk_upload_report_handler import (get_dynamodb_report_items, + get_times_for_scan, + report_handler, + write_empty_report, + write_items_to_csv) +from tests.unit.conftest import (MOCK_BULK_REPORT_TABLE_NAME, + MOCK_LG_STAGING_STORE_BUCKET) from tests.unit.helpers.data.dynamo_scan_response import ( - MOCK_RESPONSE, - EXPECTED_RESPONSE, - MOCK_RESPONSE_WITH_LAST_KEY, - MOCK_EMPTY_RESPONSE, - UNEXPECTED_RESPONSE, -) -from tests.unit.conftest import ( - MOCK_BULK_REPORT_TABLE_NAME, - MOCK_LG_STAGING_STORE_BUCKET, -) + EXPECTED_RESPONSE, MOCK_EMPTY_RESPONSE, MOCK_RESPONSE, + MOCK_RESPONSE_WITH_LAST_KEY, UNEXPECTED_RESPONSE) + @freeze_time("2012-01-14 7:20:01") def test_get_time_for_scan_after_7am(): @@ -70,6 +62,7 @@ def test_write_items_to_csv(): assert row == item os.remove("test_file") + def test_write_empty_file_to_txt(): write_empty_report("test_file") From b7e19330143b832d657c954685c7db6911fdca82 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Tue, 31 Oct 2023 11:35:32 +0000 Subject: [PATCH 32/46] [PRMDR-372] Amend test of bulk upload status, use freezegun for patching datetime --- lambdas/models/bulk_upload_status.py | 23 +--- .../unit/models/test_bulk_upload_status.py | 114 ++++++++---------- .../unit/services/test_bulk_upload_service.py | 27 +++-- 3 files changed, 67 insertions(+), 97 deletions(-) diff --git a/lambdas/models/bulk_upload_status.py b/lambdas/models/bulk_upload_status.py index e65395a0b..060e5cf30 100644 --- a/lambdas/models/bulk_upload_status.py +++ b/lambdas/models/bulk_upload_status.py @@ -1,8 +1,8 @@ from datetime import datetime -from typing import List, Literal, TypeAlias, Union +from typing import Literal from models.config import to_capwords -from pydantic import BaseModel, ConfigDict, Field, TypeAdapter +from pydantic import BaseModel, ConfigDict, Field from utils.utilities import create_reference_id @@ -10,8 +10,8 @@ class UploadStatusBaseClass(BaseModel): model_config = ConfigDict(alias_generator=to_capwords, populate_by_name=True) id: str = Field(alias="ID", default_factory=create_reference_id) nhs_number: str - timestamp: int = Field(default_factory=lambda: int(now().timestamp())) - date: str = Field(default_factory=lambda: date_as_string(now())) + timestamp: int = Field(default_factory=lambda: int(datetime.now().timestamp())) + date: str = Field(default_factory=lambda: date_string_yyyymmdd(datetime.now())) file_path: str @@ -24,18 +24,5 @@ class FailedUpload(UploadStatusBaseClass): failure_reason: str -UploadStatus = TypeAdapter(Union[SuccessfulUpload, FailedUpload]) - -UploadStatusListType: TypeAlias = List[Union[SuccessfulUpload, FailedUpload]] -UploadStatusList = TypeAdapter(UploadStatusListType) - -FieldsToReport = [] - - -def now() -> datetime: - """Helper func for easier mocking, as datetime.now is immutable""" - return datetime.now() - - -def date_as_string(time_now: datetime) -> str: +def date_string_yyyymmdd(time_now: datetime) -> str: return time_now.strftime("%Y-%m-%d") diff --git a/lambdas/tests/unit/models/test_bulk_upload_status.py b/lambdas/tests/unit/models/test_bulk_upload_status.py index a7559f924..4cbd42034 100644 --- a/lambdas/tests/unit/models/test_bulk_upload_status.py +++ b/lambdas/tests/unit/models/test_bulk_upload_status.py @@ -1,99 +1,79 @@ -from datetime import datetime - -from models.bulk_upload_status import (FailedUpload, SuccessfulUpload, - UploadStatus, UploadStatusList) +from freezegun import freeze_time +from models.bulk_upload_status import FailedUpload, SuccessfulUpload +from tests.unit.conftest import TEST_OBJECT_KEY MOCK_DATA_COMPLETE_UPLOAD = { - "ID": "719ca7a7-9c30-48f3-a472-c3daaf30d548", + "ID": TEST_OBJECT_KEY, "NhsNumber": "9000000009", - "Timestamp": "1698146661", - "Date": "2023-10-24", + "Timestamp": 1698661500, + "Date": "2023-10-30", "UploadStatus": "complete", - "FilePath": "/9000000009/1of1_Lloyd_George_Record_[Joe Bloggs]_[9000000009]_[25-12-2019]", + "FilePath": "/9000000009/1of1_Lloyd_George_Record_[Joe Bloggs]_[9000000009]_[25-12-2019].pdf", } +MOCK_FAILURE_REASON = "File name not matching Lloyd George naming convention" MOCK_DATA_FAILED_UPLOAD = { - "ID": "719ca7a7-9c30-48f3-a472-c3daaf30e975", + "ID": TEST_OBJECT_KEY, "NhsNumber": "9000000025", - "Timestamp": "1698109408", - "Date": "2023-10-24", + "Timestamp": 1698661500, + "Date": "2023-10-30", "UploadStatus": "failed", - "FailureReason": "File name not matching Lloyd George naming convention", + "FailureReason": MOCK_FAILURE_REASON, "FilePath": "/9000000025/invalid_filename.pdf", } -def test_parse_json_into_successful_upload(): - expected = SuccessfulUpload( - ID="719ca7a7-9c30-48f3-a472-c3daaf30d548", +def test_create_successful_upload(): + expected = MOCK_DATA_COMPLETE_UPLOAD + actual = SuccessfulUpload( + ID=TEST_OBJECT_KEY, nhs_number="9000000009", - timestamp=1698146661, - date="2023-10-24", + timestamp=1698661500, + date="2023-10-30", upload_status="complete", - file_path="/9000000009/1of1_Lloyd_George_Record_[Joe Bloggs]_[9000000009]_[25-12-2019]", - ) - - actual = UploadStatus.validate_python(MOCK_DATA_COMPLETE_UPLOAD) + file_path="/9000000009/1of1_Lloyd_George_Record_[Joe Bloggs]_[9000000009]_[25-12-2019].pdf", + ).model_dump(by_alias=True) - assert isinstance(actual, SuccessfulUpload) assert actual == expected -def test_parse_json_into_failed_upload(): - expected = FailedUpload( - ID="719ca7a7-9c30-48f3-a472-c3daaf30e975", +def test_create_failed_upload(): + expected = MOCK_DATA_FAILED_UPLOAD + actual = FailedUpload( + ID=TEST_OBJECT_KEY, nhs_number="9000000025", - timestamp=1698109408, - date="2023-10-24", + timestamp=1698661500, + date="2023-10-30", upload_status="failed", - failure_reason="File name not matching Lloyd George naming convention", + failure_reason=MOCK_FAILURE_REASON, file_path="/9000000025/invalid_filename.pdf", - ) - - actual = UploadStatus.validate_python(MOCK_DATA_FAILED_UPLOAD) + ).model_dump(by_alias=True) - assert isinstance(actual, FailedUpload) assert actual == expected -def test_parsing_a_list_of_record(): - items = [ - MOCK_DATA_COMPLETE_UPLOAD, - MOCK_DATA_FAILED_UPLOAD, - MOCK_DATA_COMPLETE_UPLOAD, - ] +@freeze_time("2023-10-30 10:25:00") +def test_successful_upload_ids_and_timestamp_are_auto_populated_if_not_given(mocker): + mocker.patch("uuid.uuid4", return_value=TEST_OBJECT_KEY) - actual = UploadStatusList.validate_python(items) + expected = MOCK_DATA_COMPLETE_UPLOAD + actual = SuccessfulUpload( + nhs_number="9000000009", + file_path="/9000000009/1of1_Lloyd_George_Record_[Joe Bloggs]_[9000000009]_[25-12-2019].pdf", + ).model_dump(by_alias=True) - assert isinstance(actual[0], SuccessfulUpload) - assert isinstance(actual[1], FailedUpload) - assert isinstance(actual[2], SuccessfulUpload) + assert actual == expected -def test_ids_and_timestamp_are_auto_populated_if_not_given(mocker): - mocker.patch( - "models.bulk_upload_status.now", return_value=datetime(2023, 10, 20, 10, 25) - ) - mocker.patch("uuid.uuid4", return_value="mocked_uuid") +@freeze_time("2023-10-30 10:25:00") +def test_failed_upload_ids_and_timestamp_are_auto_populated_if_not_given(mocker): + mocker.patch("uuid.uuid4", return_value=TEST_OBJECT_KEY) - upload_status = SuccessfulUpload( - nhs_number="9000000009", - file_path="/9000000009/1of1_Lloyd_George_Record_[Joe Bloggs]_[9000000009]_[25-12-2019]", - ) - - assert upload_status.date == "2023-10-20" - assert upload_status.timestamp == 1697793900 - assert upload_status.id == "mocked_uuid" - assert upload_status.upload_status == "complete" - print(upload_status.model_dump()) + expected = MOCK_DATA_FAILED_UPLOAD + actual = FailedUpload( + nhs_number="9000000025", + file_path="/9000000025/invalid_filename.pdf", + failure_reason=MOCK_FAILURE_REASON, + ).model_dump(by_alias=True) - upload_status_failed = FailedUpload( - nhs_number="9000000009", - failure_reason="File name not matching Lloyd George name convention", - file_path="/9000000009/1of1_Lloyd_George_Record_[Joe Bloggs]_[9000000009]_[25-12-2019]", - ) - - assert upload_status_failed.date == "2023-10-20" - assert upload_status_failed.timestamp == 1697793900 - assert upload_status_failed.id == "mocked_uuid" - assert upload_status_failed.upload_status == "failed" + assert actual == expected diff --git a/lambdas/tests/unit/services/test_bulk_upload_service.py b/lambdas/tests/unit/services/test_bulk_upload_service.py index 3cf0ffc7b..8f8ceedd4 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_service.py @@ -36,7 +36,7 @@ def mock_validate_files(mocker): yield mocker.patch.object(BulkUploadService, "validate_files") -def test_handle_sqs_message_calls_create_lg_records_and_copy_files_when_validation_passed( +def test_handle_sqs_message_calls_create_lg_records_and_copy_files_for_happy_path( set_env, mocker, mock_uuid, mock_check_virus_result, mock_validate_files ): mock_create_lg_records_and_copy_files = mocker.patch.object( @@ -47,6 +47,7 @@ def test_handle_sqs_message_calls_create_lg_records_and_copy_files_when_validati ) mock_validate_files.return_value = None + mock_check_virus_result.return_value = None service = BulkUploadService() service.handle_sqs_message(message=TEST_SQS_MESSAGE) @@ -55,7 +56,7 @@ def test_handle_sqs_message_calls_create_lg_records_and_copy_files_when_validati mock_report_upload_complete.assert_called() -def test_handle_sqs_message_calls_report_upload_failure_when_lg_file_are_invalid( +def test_handle_sqs_message_calls_report_upload_failure_when_lg_file_name_invalid( set_env, mocker, mock_uuid, mock_validate_files ): mock_create_lg_records_and_copy_files = mocker.patch.object( @@ -81,13 +82,13 @@ def test_handle_sqs_message_calls_report_upload_failure_when_lg_file_are_invalid def test_handle_sqs_message_report_failure_when_document_is_infected( set_env, mocker, mock_uuid, mock_validate_files, mock_check_virus_result ): - mock_check_virus_result.side_effect = DocumentInfectedException mock_report_upload_failure = mocker.patch.object( BulkUploadService, "report_upload_failure" ) mock_create_lg_records_and_copy_files = mocker.patch.object( BulkUploadService, "create_lg_records_and_copy_files" ) + mock_check_virus_result.side_effect = DocumentInfectedException service = BulkUploadService() service.handle_sqs_message(message=TEST_SQS_MESSAGE) @@ -141,10 +142,10 @@ def test_handle_sqs_message_put_message_back_to_queue_when_virus_scan_result_not def test_handle_sqs_message_rollback_transaction_when_validation_pass_but_file_transfer_failed_halfway( set_env, mocker, mock_uuid, mock_check_virus_result ): - mocked_rollback_transaction = mocker.patch.object( + mock_rollback_transaction = mocker.patch.object( BulkUploadService, "rollback_transaction" ) - mocked_report_upload_failure = mocker.patch.object( + mock_report_upload_failure = mocker.patch.object( BulkUploadService, "report_upload_failure" ) service = BulkUploadService() @@ -153,7 +154,7 @@ def test_handle_sqs_message_rollback_transaction_when_validation_pass_but_file_t service.validate_files = mocker.MagicMock() mock_client_error = ClientError( - {"Error": {"Code": "404", "Message": "Object not found in bucket"}}, + {"Error": {"Code": "AccessDenied", "Message": "Access Denied"}}, "GetObject", ) @@ -162,8 +163,11 @@ def test_handle_sqs_message_rollback_transaction_when_validation_pass_but_file_t service.handle_sqs_message(message=TEST_SQS_MESSAGE) - mocked_rollback_transaction.assert_called() - mocked_report_upload_failure.assert_called() + mock_rollback_transaction.assert_called() + mock_report_upload_failure.assert_called_with( + TEST_STAGING_METADATA, + "Validation passed but error occurred during file transfer", + ) def test_handle_sqs_message_raise_InvalidMessageException_when_failed_to_extract_data_from_message( @@ -197,10 +201,9 @@ def test_check_virus_result_raise_no_error_when_all_files_are_clean( service.check_virus_result(TEST_STAGING_METADATA) - assert ( - caplog.records[-1].message - == f"Verified that all documents for patient {TEST_STAGING_METADATA.nhs_number} are clean." - ) + expected_log = f"Verified that all documents for patient {TEST_STAGING_METADATA.nhs_number} are clean." + actual_log = caplog.records[-1].message + assert actual_log == expected_log def test_check_virus_result_raise_VirusScanNoResultException_when_one_file_not_scanned( From 77ac50a92b1f019b7492a6f9fdf56d5c6333ac08 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Tue, 31 Oct 2023 12:23:00 +0000 Subject: [PATCH 33/46] [PRMDR-369] Fix incorrect comparison of Enum and str --- lambdas/enums/virus_scan_result.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lambdas/enums/virus_scan_result.py b/lambdas/enums/virus_scan_result.py index 6ee0c5ebd..484d9e902 100644 --- a/lambdas/enums/virus_scan_result.py +++ b/lambdas/enums/virus_scan_result.py @@ -1,7 +1,7 @@ -from enum import Enum +from enum import StrEnum -class VirusScanResult(Enum): +class VirusScanResult(StrEnum): CLEAN = "Clean" INFECTED = "Infected" INFECTED_ALLOWED = "InfectedAllowed" From 821055d3c4c382ab9b0abed9cdd57c50c02265f1 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Tue, 31 Oct 2023 12:48:35 +0000 Subject: [PATCH 34/46] [PRMDR-372] Amend LG Validator to correctly report the cases for too few files and too many files --- .../unit/utils/test_lloyd_george_validator.py | 38 +++++++++++++++++-- lambdas/utils/lloyd_george_validator.py | 15 +++++--- 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/lambdas/tests/unit/utils/test_lloyd_george_validator.py b/lambdas/tests/unit/utils/test_lloyd_george_validator.py index 784ea2c56..447acd09a 100644 --- a/lambdas/tests/unit/utils/test_lloyd_george_validator.py +++ b/lambdas/tests/unit/utils/test_lloyd_george_validator.py @@ -5,10 +5,15 @@ from requests import Response from tests.unit.helpers.data.pds.pds_patient_response import PDS_PATIENT from utils.lloyd_george_validator import ( - LGInvalidFilesException, check_for_duplicate_files, + LGInvalidFilesException, + check_for_duplicate_files, check_for_file_names_agrees_with_each_other, - check_for_number_of_files_match_expected, extract_info_from_filename, - validate_file_name, validate_lg_file_type, validate_with_pds_service) + check_for_number_of_files_match_expected, + extract_info_from_filename, + validate_file_name, + validate_lg_file_type, + validate_with_pds_service, +) def test_catching_error_when_file_type_not_pdf(): @@ -72,12 +77,37 @@ def test_files_without_duplication(): def test_files_list_with_missing_files(): - with pytest.raises(LGInvalidFilesException): + with pytest.raises(LGInvalidFilesException) as e: lg_file_list = [ "1of3_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf", "2of3_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf", ] check_for_number_of_files_match_expected(lg_file_list[0], len(lg_file_list)) + assert str(e.value) == "There are missing file(s) in the request" + + +def test_files_list_with_too_many_files(): + with pytest.raises(LGInvalidFilesException) as e: + lg_file_list = [ + "1of3_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf", + "2of3_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf", + "3of3_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf", + "1of3_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf", + ] + check_for_number_of_files_match_expected(lg_file_list[0], len(lg_file_list)) + assert str(e.value) == "There are more files than the total number in file name" + +def test_files_list_with_invalid_name(): + with pytest.raises(LGInvalidFilesException) as e: + lg_file_list = [ + "invalid_file_name", + "2of4_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf", + "3of4_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf", + "4of4_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf", + ] + check_for_number_of_files_match_expected(lg_file_list[0], len(lg_file_list)) + assert str(e.value) == "One or more of the files do not match naming convention" + def test_files_without_missing_files(): diff --git a/lambdas/utils/lloyd_george_validator.py b/lambdas/utils/lloyd_george_validator.py index 18102076b..62f490a9c 100644 --- a/lambdas/utils/lloyd_george_validator.py +++ b/lambdas/utils/lloyd_george_validator.py @@ -42,11 +42,16 @@ def check_for_duplicate_files(file_list: list[str]): def check_for_number_of_files_match_expected(file_name: str, total_files_number: int): lg_number_regex = "of[0-9]+" - expected_number_of_files = re.search(lg_number_regex, file_name) - if expected_number_of_files and not expected_number_of_files.group()[2:] == str( - total_files_number - ): - raise LGInvalidFilesException("There are missing file(s) in the request") + regex_match_result = re.search(lg_number_regex, file_name) + try: + expected_number_of_files = int(regex_match_result.group()[2:]) + if total_files_number < expected_number_of_files : + raise LGInvalidFilesException("There are missing file(s) in the request") + elif total_files_number > expected_number_of_files: + raise LGInvalidFilesException("There are more files than the total number in file name") + except (AttributeError, IndexError, ValueError): + raise LGInvalidFilesException("One or more of the files do not match naming convention") + def validate_lg_files(file_list: list[NHSDocumentReference]): From 736bed45dca696c7c55c1ec77b36f27557098438 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Tue, 31 Oct 2023 13:05:24 +0000 Subject: [PATCH 35/46] [PRMDR-372] Amend LG Validator to give proper failure reason on PDS related errors --- .../unit/utils/test_lloyd_george_validator.py | 8 +++++--- lambdas/utils/lloyd_george_validator.py | 20 ++++++++++++++----- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/lambdas/tests/unit/utils/test_lloyd_george_validator.py b/lambdas/tests/unit/utils/test_lloyd_george_validator.py index 447acd09a..3e4f33dcb 100644 --- a/lambdas/tests/unit/utils/test_lloyd_george_validator.py +++ b/lambdas/tests/unit/utils/test_lloyd_george_validator.py @@ -147,7 +147,7 @@ def test_files_for_different_patients(): ] with pytest.raises(LGInvalidFilesException) as e: check_for_file_names_agrees_with_each_other(lg_file_list) - assert e == LGInvalidFilesException("File names does not match with each other") + assert str(e.value) == "File names does not match with each other" def test_validate_nhs_id_with_pds_service(mocker, mock_pds_call): @@ -251,8 +251,9 @@ def test_patient_not_found_with_pds_service(mocker, mock_pds_call): mock_pds_call.return_value = response mock_odc_code = mocker.patch("utils.lloyd_george_validator.get_user_ods_code") - with pytest.raises(LGInvalidFilesException): + with pytest.raises(LGInvalidFilesException) as e: validate_with_pds_service(lg_file_list, "9000000009") + assert str(e.value) == "Could not find the given patient on PDS" mock_pds_call.assert_called_with(nhs_number="9000000009", retry_on_expired=True) mock_odc_code.assert_not_called() @@ -268,8 +269,9 @@ def test_bad_request_with_pds_service(mocker, mock_pds_call): mock_pds_call.return_value = response mock_odc_code = mocker.patch("utils.lloyd_george_validator.get_user_ods_code") - with pytest.raises(LGInvalidFilesException): + with pytest.raises(LGInvalidFilesException) as e: validate_with_pds_service(lg_file_list, "9000000009") + assert str(e.value) == "Failed to retrieve patient data from PDS" mock_pds_call.assert_called_with(nhs_number="9000000009", retry_on_expired=True) mock_odc_code.assert_not_called() diff --git a/lambdas/utils/lloyd_george_validator.py b/lambdas/utils/lloyd_george_validator.py index 62f490a9c..b50971225 100644 --- a/lambdas/utils/lloyd_george_validator.py +++ b/lambdas/utils/lloyd_george_validator.py @@ -45,13 +45,16 @@ def check_for_number_of_files_match_expected(file_name: str, total_files_number: regex_match_result = re.search(lg_number_regex, file_name) try: expected_number_of_files = int(regex_match_result.group()[2:]) - if total_files_number < expected_number_of_files : + if total_files_number < expected_number_of_files: raise LGInvalidFilesException("There are missing file(s) in the request") elif total_files_number > expected_number_of_files: - raise LGInvalidFilesException("There are more files than the total number in file name") + raise LGInvalidFilesException( + "There are more files than the total number in file name" + ) except (AttributeError, IndexError, ValueError): - raise LGInvalidFilesException("One or more of the files do not match naming convention") - + raise LGInvalidFilesException( + "One or more of the files do not match naming convention" + ) def validate_lg_files(file_list: list[NHSDocumentReference]): @@ -138,9 +141,16 @@ def validate_with_pds_service(file_name_list: list[str], nhs_number: str): if patient_details.general_practice_ods != current_user_ods: raise LGInvalidFilesException("User is not allowed to access patient") - except (HTTPError, ValidationError, ClientError, ValueError) as e: + except (ValidationError, ClientError, ValueError) as e: logger.error(e) raise LGInvalidFilesException(e) + except HTTPError as e: + logger.error(e) + if "404" in str(e): + raise LGInvalidFilesException("Could not find the given patient on PDS") + else: + raise LGInvalidFilesException("Failed to retrieve patient data from PDS") + def get_user_ods_code(): From 8ac65f9c0943bf7b8b1ca1389d375f94e9ba119a Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Tue, 31 Oct 2023 13:17:03 +0000 Subject: [PATCH 36/46] [PRMDR-369] Replace base classs StrEnum with (str, Enum) as github action still using python 3.10 --- lambdas/enums/virus_scan_result.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lambdas/enums/virus_scan_result.py b/lambdas/enums/virus_scan_result.py index 484d9e902..959cf4da9 100644 --- a/lambdas/enums/virus_scan_result.py +++ b/lambdas/enums/virus_scan_result.py @@ -1,7 +1,7 @@ -from enum import StrEnum +from enum import Enum -class VirusScanResult(StrEnum): +class VirusScanResult(str, Enum): CLEAN = "Clean" INFECTED = "Infected" INFECTED_ALLOWED = "InfectedAllowed" From 1b105760f575d8f6cf690f0154e4d6e9716d9f86 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Tue, 31 Oct 2023 14:14:21 +0000 Subject: [PATCH 37/46] [PRMDR-372] Undo unintended change to non related code --- app/src/helpers/requests/getPresignedUrlForZip.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/src/helpers/requests/getPresignedUrlForZip.ts b/app/src/helpers/requests/getPresignedUrlForZip.ts index 555d2d56e..b197612f0 100644 --- a/app/src/helpers/requests/getPresignedUrlForZip.ts +++ b/app/src/helpers/requests/getPresignedUrlForZip.ts @@ -7,7 +7,7 @@ type Args = { nhsNumber: string; baseUrl: string; baseHeaders: AuthHeaders; - docType: DOCUMENT_TYPE | Array; + docType: DOCUMENT_TYPE; }; type GetPresignedUrl = { From 61fc9f9ad13618ff323ce291601b973619f4711e Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Tue, 31 Oct 2023 14:29:03 +0000 Subject: [PATCH 38/46] [PRMDR-372] minor change on wordings --- lambdas/services/bulk_upload_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index 955f64a56..3a559a7d4 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -147,12 +147,12 @@ def check_virus_result(self, staging_metadata: StagingMetadata): continue elif scan_result == VirusScanResult.INFECTED: raise DocumentInfectedException( - f"Found infected document filepath: {file_path}" + f"Found infected document: {file_path}" ) else: # handle cases other than Clean or Infected e.g. Unscannable, Error raise VirusScanFailedException( - f"Failed to scan document filepath: {file_path}, scan result was {scan_result}" + f"Failed to scan document: {file_path}, scan result was {scan_result}" ) except TagNotFoundException: raise VirusScanNoResultException( From db46577f169058652886a80227775666691fb8d7 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Tue, 31 Oct 2023 15:39:49 +0000 Subject: [PATCH 39/46] [PRMDR-372] Fix bulk upload report lambda possible failure due related to fieldnames issue --- lambdas/handlers/bulk_upload_report_handler.py | 8 ++++---- lambdas/models/bulk_upload_status.py | 11 +++++++++++ .../handlers/test_bulk_upload_report_handler.py | 14 ++++++++------ .../bulk_upload/expected_bulk_upload_report.csv | 3 +++ .../unit/utils/test_lloyd_george_validator.py | 13 ++++--------- lambdas/utils/lloyd_george_validator.py | 1 - 6 files changed, 30 insertions(+), 20 deletions(-) create mode 100644 lambdas/tests/unit/helpers/data/bulk_upload/expected_bulk_upload_report.csv diff --git a/lambdas/handlers/bulk_upload_report_handler.py b/lambdas/handlers/bulk_upload_report_handler.py index 22a233560..f8815a4c6 100644 --- a/lambdas/handlers/bulk_upload_report_handler.py +++ b/lambdas/handlers/bulk_upload_report_handler.py @@ -2,9 +2,11 @@ import datetime import logging import os +from typing import Optional from boto3.dynamodb.conditions import Attr from botocore.exceptions import ClientError +from models.bulk_upload_status import FieldNamesForBulkUploadReport from services.dynamo_service import DynamoDBService from services.s3_service import S3Service from utils.decorators.ensure_env_var import ensure_environment_variables @@ -51,7 +53,7 @@ def report_handler(db_service, s3_service): def get_dynamodb_report_items( db_service, start_timestamp: int, end_timestamp: int -) -> None or list: +) -> Optional[list]: logger.info("Starting Scan on DynamoDB table") bulk_upload_table_name = os.getenv("BULK_UPLOAD_DYNAMODB_NAME") filter_time = Attr("Timestamp").gt(start_timestamp) & Attr("Timestamp").lt( @@ -78,12 +80,11 @@ def get_dynamodb_report_items( def write_items_to_csv(items: list, csv_file_path: str): logger.info("Writing scan results to csv file") with open(csv_file_path, "w") as output_file: - field_names = items[0].keys() + field_names = FieldNamesForBulkUploadReport dict_writer_object = csv.DictWriter(output_file, fieldnames=field_names) dict_writer_object.writeheader() for item in items: dict_writer_object.writerow(item) - output_file.close() def get_times_for_scan(): @@ -100,4 +101,3 @@ def get_times_for_scan(): def write_empty_report(file_path: str): with open(file_path, "w") as output_file: output_file.write("No data was found for this timeframe") - output_file.close() diff --git a/lambdas/models/bulk_upload_status.py b/lambdas/models/bulk_upload_status.py index 060e5cf30..6be107990 100644 --- a/lambdas/models/bulk_upload_status.py +++ b/lambdas/models/bulk_upload_status.py @@ -24,5 +24,16 @@ class FailedUpload(UploadStatusBaseClass): failure_reason: str +FieldNamesForBulkUploadReport = [ + "NhsNumber", + "UploadStatus", + "FailureReason", + "FilePath", + "Date", + "Timestamp", + "ID", +] + + def date_string_yyyymmdd(time_now: datetime) -> str: return time_now.strftime("%Y-%m-%d") diff --git a/lambdas/tests/unit/handlers/test_bulk_upload_report_handler.py b/lambdas/tests/unit/handlers/test_bulk_upload_report_handler.py index 858bdff53..298431102 100644 --- a/lambdas/tests/unit/handlers/test_bulk_upload_report_handler.py +++ b/lambdas/tests/unit/handlers/test_bulk_upload_report_handler.py @@ -1,4 +1,3 @@ -import csv import os from datetime import datetime from unittest.mock import call @@ -12,9 +11,12 @@ write_items_to_csv) from tests.unit.conftest import (MOCK_BULK_REPORT_TABLE_NAME, MOCK_LG_STAGING_STORE_BUCKET) +from tests.unit.helpers.data.bulk_upload.test_data import readfile from tests.unit.helpers.data.dynamo_scan_response import ( EXPECTED_RESPONSE, MOCK_EMPTY_RESPONSE, MOCK_RESPONSE, MOCK_RESPONSE_WITH_LAST_KEY, UNEXPECTED_RESPONSE) +from tests.unit.models.test_bulk_upload_status import ( + MOCK_DATA_COMPLETE_UPLOAD, MOCK_DATA_FAILED_UPLOAD) @freeze_time("2012-01-14 7:20:01") @@ -51,15 +53,15 @@ def test_get_time_for_scan_at_7am(): def test_write_items_to_csv(): - items = [{"id": "1", "key": "value"}, {"id": "2", "key": "value"}] + items = [MOCK_DATA_COMPLETE_UPLOAD, MOCK_DATA_FAILED_UPLOAD] write_items_to_csv(items, "test_file") + expected = readfile("expected_bulk_upload_report.csv") + with open("test_file") as test_file: - csv_reader = csv.DictReader(test_file, delimiter=",") - assert csv_reader.fieldnames == list(items[0].keys()) - for row, item in zip(csv_reader, items): - assert row == item + actual = test_file.read() + assert expected == actual os.remove("test_file") diff --git a/lambdas/tests/unit/helpers/data/bulk_upload/expected_bulk_upload_report.csv b/lambdas/tests/unit/helpers/data/bulk_upload/expected_bulk_upload_report.csv new file mode 100644 index 000000000..37cdd477a --- /dev/null +++ b/lambdas/tests/unit/helpers/data/bulk_upload/expected_bulk_upload_report.csv @@ -0,0 +1,3 @@ +NhsNumber,UploadStatus,FailureReason,FilePath,Date,Timestamp,ID +9000000009,complete,,/9000000009/1of1_Lloyd_George_Record_[Joe Bloggs]_[9000000009]_[25-12-2019].pdf,2023-10-30,1698661500,1234-4567-8912-HSDF-TEST +9000000025,failed,File name not matching Lloyd George naming convention,/9000000025/invalid_filename.pdf,2023-10-30,1698661500,1234-4567-8912-HSDF-TEST diff --git a/lambdas/tests/unit/utils/test_lloyd_george_validator.py b/lambdas/tests/unit/utils/test_lloyd_george_validator.py index 3e4f33dcb..c965cd687 100644 --- a/lambdas/tests/unit/utils/test_lloyd_george_validator.py +++ b/lambdas/tests/unit/utils/test_lloyd_george_validator.py @@ -5,15 +5,10 @@ from requests import Response from tests.unit.helpers.data.pds.pds_patient_response import PDS_PATIENT from utils.lloyd_george_validator import ( - LGInvalidFilesException, - check_for_duplicate_files, + LGInvalidFilesException, check_for_duplicate_files, check_for_file_names_agrees_with_each_other, - check_for_number_of_files_match_expected, - extract_info_from_filename, - validate_file_name, - validate_lg_file_type, - validate_with_pds_service, -) + check_for_number_of_files_match_expected, extract_info_from_filename, + validate_file_name, validate_lg_file_type, validate_with_pds_service) def test_catching_error_when_file_type_not_pdf(): @@ -97,6 +92,7 @@ def test_files_list_with_too_many_files(): check_for_number_of_files_match_expected(lg_file_list[0], len(lg_file_list)) assert str(e.value) == "There are more files than the total number in file name" + def test_files_list_with_invalid_name(): with pytest.raises(LGInvalidFilesException) as e: lg_file_list = [ @@ -109,7 +105,6 @@ def test_files_list_with_invalid_name(): assert str(e.value) == "One or more of the files do not match naming convention" - def test_files_without_missing_files(): try: lg_file_list = [ diff --git a/lambdas/utils/lloyd_george_validator.py b/lambdas/utils/lloyd_george_validator.py index b50971225..f950dc9ba 100644 --- a/lambdas/utils/lloyd_george_validator.py +++ b/lambdas/utils/lloyd_george_validator.py @@ -152,7 +152,6 @@ def validate_with_pds_service(file_name_list: list[str], nhs_number: str): raise LGInvalidFilesException("Failed to retrieve patient data from PDS") - def get_user_ods_code(): ssm_service = SSMService() return ssm_service.get_ssm_parameter(SSMParameter.GP_ODS_CODE.value) From 4d7da3add661a5fefdab91cac7a3594fdb7e3543 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Tue, 31 Oct 2023 15:45:07 +0000 Subject: [PATCH 40/46] [PRMDR-372] Undo unintended change to scss file by linter --- app/src/styles/App.scss | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/src/styles/App.scss b/app/src/styles/App.scss index db3a5de17..8ee3ac627 100644 --- a/app/src/styles/App.scss +++ b/app/src/styles/App.scss @@ -94,9 +94,7 @@ } &:focus { background-color: #ffeb3b; - box-shadow: - 0 -2px #ffeb3b, - 0 4px #212b32; + box-shadow: 0 -2px #ffeb3b, 0 4px #212b32; color: #212b32; outline: 4px solid transparent; text-decoration: none; From 7e10532f4ae4d752c7cd87dd79a3c7c6b916d655 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Wed, 1 Nov 2023 11:08:29 +0000 Subject: [PATCH 41/46] [PRMDR-372] patch get_user_ods_code to return mock ODS code in sandboxes --- lambdas/handlers/search_patient_details_handler.py | 12 +----------- lambdas/services/mock_data/pds_patient.json | 2 +- lambdas/utils/lloyd_george_validator.py | 8 ++++++-- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/lambdas/handlers/search_patient_details_handler.py b/lambdas/handlers/search_patient_details_handler.py index 0cf7a6792..d151f268d 100644 --- a/lambdas/handlers/search_patient_details_handler.py +++ b/lambdas/handlers/search_patient_details_handler.py @@ -1,28 +1,18 @@ import logging -import os from json import JSONDecodeError from pydantic import ValidationError -from services.mock_pds_service import MockPdsApiService -from services.pds_api_service import PdsApiService from services.ssm_service import SSMService from utils.decorators.validate_patient_id import validate_patient_id from utils.exceptions import (InvalidResourceIdException, PatientNotFoundException, PdsErrorException) from utils.lambda_response import ApiGatewayResponse +from utils.utilities import get_pds_service logger = logging.getLogger() logger.setLevel(logging.INFO) -def get_pds_service(): - return ( - PdsApiService - if (os.getenv("PDS_FHIR_IS_STUBBED") == "false") - else MockPdsApiService - ) - - @validate_patient_id def lambda_handler(event, context): logger.info("API Gateway event received - processing starts") diff --git a/lambdas/services/mock_data/pds_patient.json b/lambdas/services/mock_data/pds_patient.json index 323e418df..296f1c9fc 100644 --- a/lambdas/services/mock_data/pds_patient.json +++ b/lambdas/services/mock_data/pds_patient.json @@ -80,7 +80,7 @@ "type": "Organization", "identifier": { "system": "https://fhir.nhs.uk/Id/ods-organization-code", - "value": "H81109", + "value": "Y12345", "period": { "start": "2020-01-01" } diff --git a/lambdas/utils/lloyd_george_validator.py b/lambdas/utils/lloyd_george_validator.py index f950dc9ba..3fd635eff 100644 --- a/lambdas/utils/lloyd_george_validator.py +++ b/lambdas/utils/lloyd_george_validator.py @@ -1,5 +1,6 @@ import datetime import logging +import os import re from typing import Optional @@ -153,5 +154,8 @@ def validate_with_pds_service(file_name_list: list[str], nhs_number: str): def get_user_ods_code(): - ssm_service = SSMService() - return ssm_service.get_ssm_parameter(SSMParameter.GP_ODS_CODE.value) + if os.getenv("PDS_FHIR_IS_STUBBED"): + return "Y12345" + else: + ssm_service = SSMService() + return ssm_service.get_ssm_parameter(SSMParameter.GP_ODS_CODE.value) From 14a625d0a0c3823e5e1f94c04afae2043e609033 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Wed, 1 Nov 2023 13:06:01 +0000 Subject: [PATCH 42/46] [PRMDR-372] patch get_user_ods_code --- lambdas/utils/lloyd_george_validator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lambdas/utils/lloyd_george_validator.py b/lambdas/utils/lloyd_george_validator.py index 3fd635eff..401f04dfe 100644 --- a/lambdas/utils/lloyd_george_validator.py +++ b/lambdas/utils/lloyd_george_validator.py @@ -154,7 +154,7 @@ def validate_with_pds_service(file_name_list: list[str], nhs_number: str): def get_user_ods_code(): - if os.getenv("PDS_FHIR_IS_STUBBED"): + if os.getenv("PDS_FHIR_IS_STUBBED") in ['True', 'true']: return "Y12345" else: ssm_service = SSMService() From 867ef53a347bccdea5a01254f074ff73072dd32e Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Thu, 2 Nov 2023 12:17:23 +0000 Subject: [PATCH 43/46] [PRMDR-372] Replace generic Exception catch with specific error types --- lambdas/handlers/bulk_upload_handler.py | 4 +++- lambdas/services/bulk_upload_service.py | 2 +- lambdas/utils/lloyd_george_validator.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/lambdas/handlers/bulk_upload_handler.py b/lambdas/handlers/bulk_upload_handler.py index ce19c526a..40dd7916b 100644 --- a/lambdas/handlers/bulk_upload_handler.py +++ b/lambdas/handlers/bulk_upload_handler.py @@ -1,6 +1,8 @@ import logging +from botocore.exceptions import ClientError from services.bulk_upload_service import BulkUploadService +from utils.exceptions import InvalidMessageException logger = logging.getLogger() logger.setLevel(logging.INFO) @@ -18,7 +20,7 @@ def lambda_handler(event, _context): try: logger.info(f"Processing message {index} of {len(event['Records'])}") bulk_upload_service.handle_sqs_message(message) - except Exception as error: + except (InvalidMessageException, ClientError) as error: logger.info(f"Fail to process current message due to error: {error}") logger.info("Continue on next message") logger.info(f"Finished processing all {len(event['Records'])} messages") diff --git a/lambdas/services/bulk_upload_service.py b/lambdas/services/bulk_upload_service.py index 3a559a7d4..9cf9e69a1 100644 --- a/lambdas/services/bulk_upload_service.py +++ b/lambdas/services/bulk_upload_service.py @@ -113,7 +113,7 @@ def handle_sqs_message(self, message: dict): logger.info( f"Successfully uploaded the Lloyd George records for patient: {staging_metadata.nhs_number}" ) - except Exception as e: + except ClientError as e: logger.info(f"Got unexpected error during file transfer: {str(e)}") logger.info("Will try to rollback any change to database and bucket") self.rollback_transaction() diff --git a/lambdas/utils/lloyd_george_validator.py b/lambdas/utils/lloyd_george_validator.py index 401f04dfe..fa0250596 100644 --- a/lambdas/utils/lloyd_george_validator.py +++ b/lambdas/utils/lloyd_george_validator.py @@ -154,7 +154,7 @@ def validate_with_pds_service(file_name_list: list[str], nhs_number: str): def get_user_ods_code(): - if os.getenv("PDS_FHIR_IS_STUBBED") in ['True', 'true']: + if os.getenv("PDS_FHIR_IS_STUBBED") in ["True", "true"]: return "Y12345" else: ssm_service = SSMService() From beb1a43e161a4e28cf4b8cf79b259c9f961331f2 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Thu, 2 Nov 2023 12:29:17 +0000 Subject: [PATCH 44/46] [PRMDR-372] Amend one test, update the catch error types at lambda_handler --- lambdas/handlers/bulk_upload_handler.py | 10 +++++++++- .../tests/unit/handlers/test_bulk_upload_handler.py | 8 ++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/lambdas/handlers/bulk_upload_handler.py b/lambdas/handlers/bulk_upload_handler.py index 40dd7916b..d4ddf7293 100644 --- a/lambdas/handlers/bulk_upload_handler.py +++ b/lambdas/handlers/bulk_upload_handler.py @@ -3,6 +3,7 @@ from botocore.exceptions import ClientError from services.bulk_upload_service import BulkUploadService from utils.exceptions import InvalidMessageException +from utils.lloyd_george_validator import LGInvalidFilesException logger = logging.getLogger() logger.setLevel(logging.INFO) @@ -20,7 +21,14 @@ def lambda_handler(event, _context): try: logger.info(f"Processing message {index} of {len(event['Records'])}") bulk_upload_service.handle_sqs_message(message) - except (InvalidMessageException, ClientError) as error: + except ( + ClientError, + InvalidMessageException, + LGInvalidFilesException, + KeyError, + TypeError, + AttributeError, + ) as error: logger.info(f"Fail to process current message due to error: {error}") logger.info("Continue on next message") logger.info(f"Finished processing all {len(event['Records'])} messages") diff --git a/lambdas/tests/unit/handlers/test_bulk_upload_handler.py b/lambdas/tests/unit/handlers/test_bulk_upload_handler.py index 8ec29fab4..2f82057ad 100644 --- a/lambdas/tests/unit/handlers/test_bulk_upload_handler.py +++ b/lambdas/tests/unit/handlers/test_bulk_upload_handler.py @@ -2,7 +2,7 @@ from handlers.bulk_upload_handler import lambda_handler from tests.unit.helpers.data.bulk_upload.test_data import \ TEST_EVENT_WITH_SQS_MESSAGES -from utils.lloyd_george_validator import LGInvalidFilesException +from utils.exceptions import InvalidMessageException @pytest.fixture @@ -22,13 +22,13 @@ def test_lambda_process_each_sqs_message_one_by_one(set_env, mocked_service): mocked_service.handle_sqs_message.assert_any_call(message) -def test_lambda_continue_process_next_message_after_handled_validation_error( +def test_lambda_continue_process_next_message_after_handled_error( set_env, mocked_service ): - # emulate that validation error happen at 2nd message + # emulate that unexpected error happen at 2nd message mocked_service.handle_sqs_message.side_effect = [ None, - LGInvalidFilesException, + InvalidMessageException, None, ] From 8b4c8e5ac8dff5df40a27d8071d86ea5a0b22d42 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Fri, 3 Nov 2023 12:02:58 +0000 Subject: [PATCH 45/46] [PRMDR-372] Replace custom field name transform function with inflection.camelize --- lambdas/models/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lambdas/models/config.py b/lambdas/models/config.py index 6fa41845b..d6ebbd42d 100644 --- a/lambdas/models/config.py +++ b/lambdas/models/config.py @@ -10,4 +10,4 @@ def to_camel(string: str) -> str: def to_capwords(snake_case_string: str) -> str: - return "".join(x.capitalize() for x in snake_case_string.split("_")) + return inflection.camelize(snake_case_string, uppercase_first_letter=True) From 40b3b18cae82ec43e79f0f3620fa037d0e0a3f4e Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Fri, 3 Nov 2023 12:48:35 +0000 Subject: [PATCH 46/46] [PRMDR-372] Change function name for clarity: to_capwords --> to_capitalized_camel --- lambdas/models/bulk_upload_status.py | 6 ++++-- lambdas/models/config.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/lambdas/models/bulk_upload_status.py b/lambdas/models/bulk_upload_status.py index 6be107990..03154f7b8 100644 --- a/lambdas/models/bulk_upload_status.py +++ b/lambdas/models/bulk_upload_status.py @@ -1,13 +1,15 @@ from datetime import datetime from typing import Literal -from models.config import to_capwords +from models.config import to_capitalized_camel from pydantic import BaseModel, ConfigDict, Field from utils.utilities import create_reference_id class UploadStatusBaseClass(BaseModel): - model_config = ConfigDict(alias_generator=to_capwords, populate_by_name=True) + model_config = ConfigDict( + alias_generator=to_capitalized_camel, populate_by_name=True + ) id: str = Field(alias="ID", default_factory=create_reference_id) nhs_number: str timestamp: int = Field(default_factory=lambda: int(datetime.now().timestamp())) diff --git a/lambdas/models/config.py b/lambdas/models/config.py index d6ebbd42d..5d18c68b9 100644 --- a/lambdas/models/config.py +++ b/lambdas/models/config.py @@ -9,5 +9,5 @@ def to_camel(string: str) -> str: conf = ConfigDict(alias_generator=to_camel) -def to_capwords(snake_case_string: str) -> str: +def to_capitalized_camel(snake_case_string: str) -> str: return inflection.camelize(snake_case_string, uppercase_first_letter=True)