From 3bc2b063bde48c43707f38974450b4f6ec873233 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Wed, 15 May 2024 12:00:50 +0100 Subject: [PATCH 1/2] [PRMDR-881] Changed file key for stitched lloyd george files to not contain any personal info --- .../services/lloyd_george_stitch_service.py | 14 ++----------- .../test_lloyd_george_stitch_service.py | 21 +++++-------------- 2 files changed, 7 insertions(+), 28 deletions(-) diff --git a/lambdas/services/lloyd_george_stitch_service.py b/lambdas/services/lloyd_george_stitch_service.py index 6c5350a7d..53366fa70 100644 --- a/lambdas/services/lloyd_george_stitch_service.py +++ b/lambdas/services/lloyd_george_stitch_service.py @@ -55,17 +55,15 @@ def stitch_lloyd_george_record(self, nhs_number: str) -> str: ) try: - filename_for_stitched_file = self.make_filename_for_stitched_file( - lg_records - ) stitched_lg_record = stitch_pdf(all_lg_parts, self.temp_folder) + filename_for_stitched_file = os.path.basename(stitched_lg_record) number_of_files = len(all_lg_parts) last_updated = self.get_most_recent_created_date(lg_records) total_file_size = self.get_total_file_size(all_lg_parts) presign_url = self.upload_stitched_lg_record_and_retrieve_presign_url( stitched_lg_record=stitched_lg_record, - filename_on_bucket=f"{nhs_number}/{filename_for_stitched_file}", + filename_on_bucket=f"combined_files/{filename_for_stitched_file}", ) response = { "number_of_files": number_of_files, @@ -176,14 +174,6 @@ def download_lloyd_george_files( return all_lg_parts - @staticmethod - def make_filename_for_stitched_file(documents: list[DocumentReference]) -> str: - sample_doc = documents[0] - base_filename = sample_doc.file_name - end_of_total_page_numbers = base_filename.index("_") - - return "Combined" + base_filename[end_of_total_page_numbers:] - def upload_stitched_lg_record_and_retrieve_presign_url( self, stitched_lg_record: str, diff --git a/lambdas/tests/unit/services/test_lloyd_george_stitch_service.py b/lambdas/tests/unit/services/test_lloyd_george_stitch_service.py index 7df37d83f..117ee5cda 100644 --- a/lambdas/tests/unit/services/test_lloyd_george_stitch_service.py +++ b/lambdas/tests/unit/services/test_lloyd_george_stitch_service.py @@ -54,12 +54,10 @@ def build_lg_doc_ref( f"{MOCK_TEMP_FOLDER}/mock_downloaded_file{i}" for i in range(1, 3 + 1) ] MOCK_STITCHED_FILE = "filename_of_stitched_lg_in_local_storage.pdf" -MOCK_STITCHED_FILE_ON_S3 = ( - f"Combined_Lloyd_George_Record_[Joe Bloggs]_[{TEST_NHS_NUMBER}]_[30-12-2019].pdf" -) +MOCK_STITCHED_FILE_ON_S3 = f"combined_files/{MOCK_STITCHED_FILE}" MOCK_TOTAL_FILE_SIZE = 1024 * 256 MOCK_PRESIGNED_URL = ( - f"https://{MOCK_LG_BUCKET}.s3.amazonaws.com/{TEST_NHS_NUMBER}/abcd-1234-5678" + f"https://{MOCK_LG_BUCKET}.s3.amazonaws.com/{MOCK_STITCHED_FILE_ON_S3}" ) @@ -173,7 +171,7 @@ def test_stitch_lloyd_george_record_happy_path( patched_stitch_service.upload_stitched_lg_record_and_retrieve_presign_url.assert_called_with( stitched_lg_record=MOCK_STITCHED_FILE, - filename_on_bucket=f"{TEST_NHS_NUMBER}/{MOCK_STITCHED_FILE_ON_S3}", + filename_on_bucket=MOCK_STITCHED_FILE_ON_S3, ) @@ -351,15 +349,6 @@ def test_download_lloyd_george_files_raise_error_when_failed_to_download( stitch_service.download_lloyd_george_files(MOCK_LLOYD_GEORGE_DOCUMENT_REFS) -def test_make_filename_for_stitched_file(stitch_service): - expected = f"Combined_Lloyd_George_Record_[Joe Bloggs]_[{TEST_NHS_NUMBER}]_[30-12-2019].pdf" - actual = stitch_service.make_filename_for_stitched_file( - MOCK_LLOYD_GEORGE_DOCUMENT_REFS - ) - - assert actual == expected - - def test_get_most_recent_created_date(stitch_service): lg_record = build_lg_doc_ref_list(page_numbers=[1, 2, 3]) lg_record[2].created = "2024-12-14T16:46:07.678657Z" @@ -383,13 +372,13 @@ def test_upload_stitched_lg_record_and_retrieve_presign_url(mock_s3, stitch_serv expected = MOCK_PRESIGNED_URL actual = stitch_service.upload_stitched_lg_record_and_retrieve_presign_url( stitched_lg_record=MOCK_STITCHED_FILE, - filename_on_bucket=f"{TEST_NHS_NUMBER}/{MOCK_STITCHED_FILE_ON_S3}", + filename_on_bucket=MOCK_STITCHED_FILE_ON_S3, ) assert actual == expected mock_s3.upload_file_with_extra_args.assert_called_with( - file_key=f"{TEST_NHS_NUMBER}/{MOCK_STITCHED_FILE_ON_S3}", + file_key=MOCK_STITCHED_FILE_ON_S3, file_name=MOCK_STITCHED_FILE, s3_bucket_name=MOCK_LG_BUCKET, extra_args={ From 2b4862bfbb8814fe9abb540e5c402510f5938786 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Thu, 16 May 2024 11:47:24 +0100 Subject: [PATCH 2/2] [PRMDR-881] minor refactoring --- lambdas/services/lloyd_george_stitch_service.py | 12 +++++------- .../test_lloyd_george_record_stitch_handler.py | 9 --------- .../services/test_lloyd_george_stitch_service.py | 14 ++++++++------ lambdas/utils/dynamo_utils.py | 2 +- 4 files changed, 14 insertions(+), 23 deletions(-) diff --git a/lambdas/services/lloyd_george_stitch_service.py b/lambdas/services/lloyd_george_stitch_service.py index 53366fa70..6c9aff311 100644 --- a/lambdas/services/lloyd_george_stitch_service.py +++ b/lambdas/services/lloyd_george_stitch_service.py @@ -14,7 +14,7 @@ from services.document_service import DocumentService from services.pdf_stitch_service import stitch_pdf from utils.audit_logging_setup import LoggingService -from utils.dynamo_utils import filter_expression_for_available_docs +from utils.dynamo_utils import filter_uploaded_docs_and_recently_uploading_docs from utils.exceptions import FileUploadInProgress from utils.filename_utils import extract_page_number from utils.lambda_exceptions import LGStitchServiceException @@ -59,7 +59,7 @@ def stitch_lloyd_george_record(self, nhs_number: str) -> str: filename_for_stitched_file = os.path.basename(stitched_lg_record) number_of_files = len(all_lg_parts) last_updated = self.get_most_recent_created_date(lg_records) - total_file_size = self.get_total_file_size(all_lg_parts) + total_file_size_in_byte = self.get_total_file_size_in_bytes(all_lg_parts) presign_url = self.upload_stitched_lg_record_and_retrieve_presign_url( stitched_lg_record=stitched_lg_record, @@ -69,7 +69,7 @@ def stitch_lloyd_george_record(self, nhs_number: str) -> str: "number_of_files": number_of_files, "last_updated": last_updated, "presign_url": presign_url, - "total_file_size_in_byte": total_file_size, + "total_file_size_in_byte": total_file_size_in_byte, } logger.audit_splunk_info( "User has viewed Lloyd George records", @@ -90,7 +90,7 @@ def get_lloyd_george_record_for_patient( self, nhs_number: str ) -> list[DocumentReference]: try: - filter_expression = filter_expression_for_available_docs() + filter_expression = filter_uploaded_docs_and_recently_uploading_docs() available_docs = ( self.document_service.fetch_available_document_references_by_type( nhs_number, @@ -150,7 +150,6 @@ def sort_documents_by_filenames( try: return sorted(documents, key=lambda doc: extract_page_number(doc.file_name)) except (KeyError, ValueError) as e: - documents[0].nhs_number logger.error( f"{LambdaError.StitchValidation.to_str()}: {str(e)}", {"Result": "Lloyd George stitching failed"}, @@ -200,6 +199,5 @@ def get_most_recent_created_date(documents: list[DocumentReference]) -> str: return max(doc.created for doc in documents) @staticmethod - def get_total_file_size(filepaths: list[str]) -> int: - # Return the sum of a list of files (unit: byte) + def get_total_file_size_in_bytes(filepaths: list[str]) -> int: return sum(os.path.getsize(filepath) for filepath in filepaths) diff --git a/lambdas/tests/unit/handlers/test_lloyd_george_record_stitch_handler.py b/lambdas/tests/unit/handlers/test_lloyd_george_record_stitch_handler.py index f376bd8a4..1fb105643 100755 --- a/lambdas/tests/unit/handlers/test_lloyd_george_record_stitch_handler.py +++ b/lambdas/tests/unit/handlers/test_lloyd_george_record_stitch_handler.py @@ -83,15 +83,6 @@ def joe_bloggs_event(): return api_gateway_proxy_event -@pytest.fixture -def mock_get_total_file_size(mocker): - yield mocker.patch.object( - LloydGeorgeStitchService, - "get_total_file_size", - return_value=MOCK_TOTAL_FILE_SIZE, - ) - - def test_lambda_handler_respond_with_200_and_presign_url( valid_id_event_without_auth_header, context, set_env, mock_stitch_service ): diff --git a/lambdas/tests/unit/services/test_lloyd_george_stitch_service.py b/lambdas/tests/unit/services/test_lloyd_george_stitch_service.py index 117ee5cda..33c3d184f 100644 --- a/lambdas/tests/unit/services/test_lloyd_george_stitch_service.py +++ b/lambdas/tests/unit/services/test_lloyd_george_stitch_service.py @@ -11,7 +11,7 @@ from services.document_service import DocumentService from services.lloyd_george_stitch_service import LloydGeorgeStitchService from tests.unit.conftest import MOCK_LG_BUCKET, TEST_NHS_NUMBER, TEST_OBJECT_KEY -from utils.dynamo_utils import filter_expression_for_available_docs +from utils.dynamo_utils import filter_uploaded_docs_and_recently_uploading_docs from utils.lambda_exceptions import LGStitchServiceException @@ -133,10 +133,10 @@ def mock_stitch_pdf(mocker): @pytest.fixture -def mock_get_total_file_size(mocker): +def mock_get_total_file_size_in_bytes(mocker): yield mocker.patch.object( LloydGeorgeStitchService, - "get_total_file_size", + "get_total_file_size_in_bytes", return_value=MOCK_TOTAL_FILE_SIZE, ) @@ -144,7 +144,7 @@ def mock_get_total_file_size(mocker): def test_stitch_lloyd_george_record_happy_path( mock_tempfile, mock_stitch_pdf, - mock_get_total_file_size, + mock_get_total_file_size_in_bytes, patched_stitch_service, ): expected = json.dumps( @@ -290,7 +290,7 @@ def test_stitch_lloyd_george_record_raise_500_error_if_failed_to_upload_stitched def test_get_lloyd_george_record_for_patient( stitch_service, mock_fetch_doc_ref_by_type ): - mock_filters = filter_expression_for_available_docs() + mock_filters = filter_uploaded_docs_and_recently_uploading_docs() expected = MOCK_LLOYD_GEORGE_DOCUMENT_REFS actual = stitch_service.get_lloyd_george_record_for_patient(TEST_NHS_NUMBER) @@ -363,7 +363,9 @@ def test_get_total_file_size(mocker, stitch_service): mocker.patch("os.path.getsize", side_effect=[19000, 20000, 21000]) expected = 60000 - actual = stitch_service.get_total_file_size(["file1.pdf", "file2.pdf", "file3.pdf"]) + actual = stitch_service.get_total_file_size_in_bytes( + ["file1.pdf", "file2.pdf", "file3.pdf"] + ) assert actual == expected diff --git a/lambdas/utils/dynamo_utils.py b/lambdas/utils/dynamo_utils.py index 934201cf8..9d77ede7f 100644 --- a/lambdas/utils/dynamo_utils.py +++ b/lambdas/utils/dynamo_utils.py @@ -113,7 +113,7 @@ def create_expression_attribute_placeholder(value: str) -> str: return f"#{inflection.camelize(value, uppercase_first_letter=True)}_attr" -def filter_expression_for_available_docs(): +def filter_uploaded_docs_and_recently_uploading_docs(): filter_builder = DynamoQueryFilterBuilder() time_limit = int(datetime.now().timestamp() - (60 * 3))