diff --git a/lambdas/handlers/create_document_reference_handler.py b/lambdas/handlers/create_document_reference_handler.py index 2cb032d8e..815e7ff35 100644 --- a/lambdas/handlers/create_document_reference_handler.py +++ b/lambdas/handlers/create_document_reference_handler.py @@ -16,6 +16,8 @@ from utils.lambda_response import ApiGatewayResponse from utils.utilities import validate_id +from services.lloyd_george_validator import validate_lg_files, LGInvalidFilesException + sys.path.append(os.path.join(os.path.dirname(__file__))) logger = logging.getLogger() @@ -119,6 +121,7 @@ def lambda_handler(event, context): return response try: + s3_response = s3_service.create_document_presigned_url_handler( document_reference.s3_bucket_name, document_reference.nhs_number + "/" + document_reference.id, @@ -135,6 +138,7 @@ def lambda_handler(event, context): return response try: + validate_lg_files(lg_documents) dynamo_service = DynamoDBService() if arf_documents: logger.info("Writing ARF document references") @@ -152,6 +156,12 @@ def lambda_handler(event, context): 500, "An error occurred when creating document reference", "POST" ).create_api_gateway_response() return response + except LGInvalidFilesException as e: + logger.error(e) + response = ApiGatewayResponse( + 400, "One or more of the files is not valid", "POST" + ).create_api_gateway_response() + return response return ApiGatewayResponse( 200, json.dumps(url_responses), "POST" diff --git a/lambdas/models/nhs_document_reference.py b/lambdas/models/nhs_document_reference.py index 9153be2df..e2be32283 100644 --- a/lambdas/models/nhs_document_reference.py +++ b/lambdas/models/nhs_document_reference.py @@ -2,9 +2,8 @@ from typing import Any from enums.metadata_field_names import DocumentReferenceMetadataFields -from pydantic import BaseModel, model_validator +from pydantic import BaseModel -from services.lloyd_george_validator import validate_lg_file_type class UploadRequestDocument(BaseModel): @@ -12,19 +11,6 @@ class UploadRequestDocument(BaseModel): contentType: str docType: str - @model_validator(mode='before') - @classmethod - def check_file_type_for_lg(cls, data: Any) -> Any: - if isinstance(data, dict): - doc_type = data.get('docType') - content_type = data.get('contentType') - elif isinstance(data, UploadRequestDocument): - doc_type = data.docType - content_type = data.contentType - if doc_type == 'LG': - validate_lg_file_type(content_type) - return data - class NHSDocumentReference: def __init__( diff --git a/lambdas/services/lloyd_george_validator.py b/lambdas/services/lloyd_george_validator.py index 6d3c43801..00d9bb67a 100644 --- a/lambdas/services/lloyd_george_validator.py +++ b/lambdas/services/lloyd_george_validator.py @@ -1,7 +1,34 @@ -class LGFileTypeException(ValueError): - """One or more of the files do not match the required file type.""" +import re + +from lambdas.models.nhs_document_reference import NHSDocumentReference + +class LGInvalidFilesException(Exception): pass -def validate_lg_file_type(file_type): +def validate_lg_file_type(file_type: str): if file_type != 'application/pdf': - raise LGFileTypeException + raise LGInvalidFilesException('One or more of the files do not match the required file type') + +def validate_file_name(name: str): + lg_regex = r'[0-9]+of[0-9]+_Lloyd_George_Record_\[[A-Za-z À-ÿ\-]+]_\[[0-9]{10}]_\[\d\d-\d\d-\d\d\d\d].pdf' + if not re.fullmatch(lg_regex, name): + raise LGInvalidFilesException('One or more of the files do not match naming convention') + +def check_for_duplicate_files(file_list: list[str]): + if len(file_list) > len(set(file_list)): + raise LGInvalidFilesException('One or more of the files has the same filename') + +def check_for_number_of_files_match_expected(file_name: str, total_files_number: int): + lg_number_regex = 'of[0-9]+' + expected_number_of_files = re.search(lg_number_regex, file_name) + if expected_number_of_files and not expected_number_of_files.group()[2:] == str(total_files_number): + raise LGInvalidFilesException('There are missing file(s) in the request') + +def validate_lg_files(file_list: list[NHSDocumentReference]): + files_name_list = [] + for doc in file_list: + check_for_number_of_files_match_expected(doc.file_name, len(file_list)) + validate_lg_file_type(doc.content_type) + validate_file_name(doc.file_name) + files_name_list.append(doc.file_name) + check_for_duplicate_files(files_name_list) diff --git a/lambdas/tests/unit/handlers/test_create_document_reference_handler.py b/lambdas/tests/unit/handlers/test_create_document_reference_handler.py index 80c2a30bd..ea8110d4d 100644 --- a/lambdas/tests/unit/handlers/test_create_document_reference_handler.py +++ b/lambdas/tests/unit/handlers/test_create_document_reference_handler.py @@ -10,7 +10,8 @@ TEST_NHS_NUMBER, TEST_OBJECT_KEY) from tests.unit.helpers.data.create_document_reference import ( ARF_MOCK_EVENT_BODY, ARF_MOCK_RESPONSE, LG_AND_ARF_MOCK_RESPONSE, - LG_MOCK_EVENT_BODY, LG_MOCK_RESPONSE, MOCK_EVENT_BODY, LG_MOCK_BAD_EVENT_BODY) + LG_MOCK_EVENT_BODY, LG_MOCK_RESPONSE, MOCK_EVENT_BODY, LG_MOCK_BAD_FILE_TYPE_EVENT_BODY, + LG_MOCK_MISSING_FILES_EVENT_BODY, LG_MOCK_BAD_FILE_NAME_EVENT_BODY, LG_MOCK_DUPLICATE_FILES_EVENT_BODY) from tests.unit.services.test_s3_service import MOCK_PRESIGNED_POST_RESPONSE from utils.lambda_response import ApiGatewayResponse @@ -265,14 +266,25 @@ def test_create_document_reference_arf_type_s3_ClientError_returns_500( actual = lambda_handler(arf_type_event, context) assert actual == expected +@pytest.mark.parametrize("event_body", [LG_MOCK_BAD_FILE_TYPE_EVENT_BODY, LG_MOCK_MISSING_FILES_EVENT_BODY, LG_MOCK_BAD_FILE_NAME_EVENT_BODY, LG_MOCK_DUPLICATE_FILES_EVENT_BODY]) +def test_invalid_file_type_for_lg_return_400(set_env, context, mocker, event_body): + mock_supported_document_get_from_field_name = mocker.patch( + "enums.supported_document_types.SupportedDocumentTypes.get_from_field_name" + ) + mock_supported_document_get_from_field_name.return_value = SupportedDocumentTypes.LG + mocker.patch("services.dynamo_service.DynamoDBService.post_item_service") + mock_presigned = mocker.patch( + "services.s3_service.S3Service.create_document_presigned_url_handler" + ) + mock_presigned.return_value = MOCK_PRESIGNED_POST_RESPONSE + -def test_invalid_file_type_for_lg_return_400(set_env, context): expected = ApiGatewayResponse( 400, - "Failed to parse document upload request data", - "GET", + "One or more of the files is not valid", + "POST", ).create_api_gateway_response() - actual = lambda_handler({"body": json.dumps(LG_MOCK_BAD_EVENT_BODY)}, context) + actual = lambda_handler({"body": json.dumps(event_body)}, context) assert actual == expected def test_create_document_reference_unknown_document_type_returns_400( diff --git a/lambdas/tests/unit/helpers/data/create_document_reference.py b/lambdas/tests/unit/helpers/data/create_document_reference.py index 4cba8720d..a713cce6d 100644 --- a/lambdas/tests/unit/helpers/data/create_document_reference.py +++ b/lambdas/tests/unit/helpers/data/create_document_reference.py @@ -69,33 +69,80 @@ "created": "2023-10-02T15:55:30.650Z", } -LG_MOCK_BAD_EVENT_BODY = { +LG_MOCK_BAD_FILE_TYPE_EVENT_BODY = { "resourceType": "DocumentReference", "subject": {"identifier": {"value": TEST_NHS_NUMBER}}, "content": [ { "attachment": [ { - "fileName": f"1of3_Lloyd_George_Record_[Joe Bloggs]_[{TEST_NHS_NUMBER}]_[25-12-2019].pdf", + "fileName": f"1of1_Lloyd_George_Record_[Joe Bloggs]_[{TEST_NHS_NUMBER}]_[25-12-2019].pdf", "contentType": "text/plain", "docType": "LG", - }, + } + ] + } + ], + "created": "2023-10-02T15:55:30.650Z", +} + +LG_MOCK_BAD_FILE_NAME_EVENT_BODY = { + "resourceType": "DocumentReference", + "subject": {"identifier": {"value": TEST_NHS_NUMBER}}, + "content": [ + { + "attachment": [ { - "fileName": f"2of3_Lloyd_George_Record_[Joe Bloggs]_[{TEST_NHS_NUMBER}]_[25-12-2019].pdf", + "fileName": f"1of1_BAD_NAME_[Joe Bloggs]_[{TEST_NHS_NUMBER}]_[25-12-2019].pdf", "contentType": "application/pdf", "docType": "LG", - }, + } + ] + } + ], + "created": "2023-10-02T15:55:30.650Z", +} + +LG_MOCK_MISSING_FILES_EVENT_BODY = { + "resourceType": "DocumentReference", + "subject": {"identifier": {"value": TEST_NHS_NUMBER}}, + "content": [ + { + "attachment": [ { - "fileName": f"3of3_Lloyd_George_Record_[Joe Bloggs]_[{TEST_NHS_NUMBER}]_[25-12-2019].pdf", + "fileName": f"1of3_Lloyd_George_Record_[Joe Bloggs]_[{TEST_NHS_NUMBER}]_[25-12-2019].pdf", + "contentType": "application/pdf", + "docType": "LG", + } + ] + } + ], + "created": "2023-10-02T15:55:30.650Z", +} + +LG_MOCK_DUPLICATE_FILES_EVENT_BODY = { + "resourceType": "DocumentReference", + "subject": {"identifier": {"value": TEST_NHS_NUMBER}}, + "content": [ + { + "attachment": [ + { + "fileName": f"1of2_Lloyd_George_Record_[Joe Bloggs]_[{TEST_NHS_NUMBER}]_[25-12-2019].pdf", "contentType": "application/pdf", "docType": "LG", }, + { + "fileName": f"1of2_Lloyd_George_Record_[Joe Bloggs]_[{TEST_NHS_NUMBER}]_[25-12-2019].pdf", + "contentType": "application/pdf", + "docType": "LG", + } ] } ], "created": "2023-10-02T15:55:30.650Z", } + ARF_MOCK_EVENT_BODY = { "resourceType": "DocumentReference", "subject": {"identifier": {"value": TEST_NHS_NUMBER}}, diff --git a/lambdas/tests/unit/services/test_lloyd_george_validator.py b/lambdas/tests/unit/services/test_lloyd_george_validator.py index 1e0dfdf66..9a3415c25 100644 --- a/lambdas/tests/unit/services/test_lloyd_george_validator.py +++ b/lambdas/tests/unit/services/test_lloyd_george_validator.py @@ -1,9 +1,10 @@ import pytest -from services.lloyd_george_validator import validate_lg_file_type, LGFileTypeException +from services.lloyd_george_validator import validate_lg_file_type, LGInvalidFilesException, validate_file_name, check_for_duplicate_files, check_for_number_of_files_match_expected + def test_catching_error_when_file_type_not_pdf(): - with pytest.raises(LGFileTypeException): + with pytest.raises(LGInvalidFilesException): file_type = 'image/png' validate_lg_file_type(file_type) @@ -11,5 +12,60 @@ def test_valid_file_type(): try: file_type = 'application/pdf' validate_lg_file_type(file_type) - except LGFileTypeException: - assert False, 'One or more of the files do not match the required file type.' + except LGInvalidFilesException: + assert False, 'One or more of the files do not match the required file type' + +def test_invalid_file_name(): + with pytest.raises(LGInvalidFilesException): + file_name = 'bad_name' + validate_file_name(file_name) + +def test_valid_file_name(): + try: + file_name = '1of1_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf' + validate_file_name(file_name) + except LGInvalidFilesException: + assert False, 'One or more of the files do not match naming convention' + +def test_valid_file_name_special_characters(): + try: + file_name = '1of1_Lloyd_George_Record_[Joé Blöggês-Glüë]_[1111111111]_[25-12-2019].pdf' + validate_file_name(file_name) + except LGInvalidFilesException: + assert False, 'One or more of the files do not match naming convention' + +def test_files_with_duplication(): + with pytest.raises(LGInvalidFilesException): + lg_file_list = [ + '1of1_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf', + '1of1_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf' + ] + check_for_duplicate_files(lg_file_list) + +def test_files_without_duplication(): + try: + lg_file_list = [ + '1of2_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf', + '2of2_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf' + ] + check_for_duplicate_files(lg_file_list) + except LGInvalidFilesException: + assert False, 'One or more of the files has the same filename' + +def test_files_list_with_missing_files(): + with pytest.raises(LGInvalidFilesException): + lg_file_list = [ + '1of3_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf', + '2of3_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf' + ] + check_for_number_of_files_match_expected(lg_file_list[0], str(len(lg_file_list))) + +def test_files_without_missing_files(): + try: + lg_file_list = [ + '1of2_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf', + '2of2_Lloyd_George_Record_[Joe Bloggs]_[1111111111]_[25-12-2019].pdf' + ] + check_for_number_of_files_match_expected(lg_file_list[0], str(len(lg_file_list))) + except LGInvalidFilesException: + assert False, 'There are missing file(s) in the request'