Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PRMDR-881 - Remove personal information from file names of stitched LG record #366

Merged
merged 2 commits into from
May 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 7 additions & 19 deletions lambdas/services/lloyd_george_stitch_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from services.document_service import DocumentService
from services.pdf_stitch_service import stitch_pdf
from utils.audit_logging_setup import LoggingService
from utils.dynamo_utils import filter_expression_for_available_docs
from utils.dynamo_utils import filter_uploaded_docs_and_recently_uploading_docs
from utils.exceptions import FileUploadInProgress
from utils.filename_utils import extract_page_number
from utils.lambda_exceptions import LGStitchServiceException
Expand Down Expand Up @@ -55,23 +55,21 @@ def stitch_lloyd_george_record(self, nhs_number: str) -> str:
)

try:
filename_for_stitched_file = self.make_filename_for_stitched_file(
lg_records
)
stitched_lg_record = stitch_pdf(all_lg_parts, self.temp_folder)
filename_for_stitched_file = os.path.basename(stitched_lg_record)
SRAlexander marked this conversation as resolved.
Show resolved Hide resolved
number_of_files = len(all_lg_parts)
last_updated = self.get_most_recent_created_date(lg_records)
total_file_size = self.get_total_file_size(all_lg_parts)
total_file_size_in_byte = self.get_total_file_size_in_bytes(all_lg_parts)

presign_url = self.upload_stitched_lg_record_and_retrieve_presign_url(
stitched_lg_record=stitched_lg_record,
filename_on_bucket=f"{nhs_number}/{filename_for_stitched_file}",
filename_on_bucket=f"combined_files/{filename_for_stitched_file}",
)
response = {
"number_of_files": number_of_files,
"last_updated": last_updated,
"presign_url": presign_url,
"total_file_size_in_byte": total_file_size,
"total_file_size_in_byte": total_file_size_in_byte,
}
logger.audit_splunk_info(
"User has viewed Lloyd George records",
Expand All @@ -92,7 +90,7 @@ def get_lloyd_george_record_for_patient(
self, nhs_number: str
) -> list[DocumentReference]:
try:
filter_expression = filter_expression_for_available_docs()
filter_expression = filter_uploaded_docs_and_recently_uploading_docs()
available_docs = (
self.document_service.fetch_available_document_references_by_type(
nhs_number,
Expand Down Expand Up @@ -152,7 +150,6 @@ def sort_documents_by_filenames(
try:
return sorted(documents, key=lambda doc: extract_page_number(doc.file_name))
except (KeyError, ValueError) as e:
documents[0].nhs_number
logger.error(
f"{LambdaError.StitchValidation.to_str()}: {str(e)}",
{"Result": "Lloyd George stitching failed"},
Expand All @@ -176,14 +173,6 @@ def download_lloyd_george_files(

return all_lg_parts

@staticmethod
def make_filename_for_stitched_file(documents: list[DocumentReference]) -> str:
sample_doc = documents[0]
base_filename = sample_doc.file_name
end_of_total_page_numbers = base_filename.index("_")

return "Combined" + base_filename[end_of_total_page_numbers:]

def upload_stitched_lg_record_and_retrieve_presign_url(
self,
stitched_lg_record: str,
Expand All @@ -210,6 +199,5 @@ def get_most_recent_created_date(documents: list[DocumentReference]) -> str:
return max(doc.created for doc in documents)

@staticmethod
def get_total_file_size(filepaths: list[str]) -> int:
# Return the sum of a list of files (unit: byte)
def get_total_file_size_in_bytes(filepaths: list[str]) -> int:
return sum(os.path.getsize(filepath) for filepath in filepaths)
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,6 @@ def joe_bloggs_event():
return api_gateway_proxy_event


@pytest.fixture
def mock_get_total_file_size(mocker):
yield mocker.patch.object(
LloydGeorgeStitchService,
"get_total_file_size",
return_value=MOCK_TOTAL_FILE_SIZE,
)


def test_lambda_handler_respond_with_200_and_presign_url(
valid_id_event_without_auth_header, context, set_env, mock_stitch_service
):
Expand Down
35 changes: 13 additions & 22 deletions lambdas/tests/unit/services/test_lloyd_george_stitch_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from services.document_service import DocumentService
from services.lloyd_george_stitch_service import LloydGeorgeStitchService
from tests.unit.conftest import MOCK_LG_BUCKET, TEST_NHS_NUMBER, TEST_OBJECT_KEY
from utils.dynamo_utils import filter_expression_for_available_docs
from utils.dynamo_utils import filter_uploaded_docs_and_recently_uploading_docs
from utils.lambda_exceptions import LGStitchServiceException


Expand Down Expand Up @@ -54,12 +54,10 @@ def build_lg_doc_ref(
f"{MOCK_TEMP_FOLDER}/mock_downloaded_file{i}" for i in range(1, 3 + 1)
]
MOCK_STITCHED_FILE = "filename_of_stitched_lg_in_local_storage.pdf"
MOCK_STITCHED_FILE_ON_S3 = (
f"Combined_Lloyd_George_Record_[Joe Bloggs]_[{TEST_NHS_NUMBER}]_[30-12-2019].pdf"
)
MOCK_STITCHED_FILE_ON_S3 = f"combined_files/{MOCK_STITCHED_FILE}"
MOCK_TOTAL_FILE_SIZE = 1024 * 256
MOCK_PRESIGNED_URL = (
f"https://{MOCK_LG_BUCKET}.s3.amazonaws.com/{TEST_NHS_NUMBER}/abcd-1234-5678"
f"https://{MOCK_LG_BUCKET}.s3.amazonaws.com/{MOCK_STITCHED_FILE_ON_S3}"
)


Expand Down Expand Up @@ -135,18 +133,18 @@ def mock_stitch_pdf(mocker):


@pytest.fixture
def mock_get_total_file_size(mocker):
def mock_get_total_file_size_in_bytes(mocker):
yield mocker.patch.object(
LloydGeorgeStitchService,
"get_total_file_size",
"get_total_file_size_in_bytes",
return_value=MOCK_TOTAL_FILE_SIZE,
)


def test_stitch_lloyd_george_record_happy_path(
mock_tempfile,
mock_stitch_pdf,
mock_get_total_file_size,
mock_get_total_file_size_in_bytes,
patched_stitch_service,
):
expected = json.dumps(
Expand All @@ -173,7 +171,7 @@ def test_stitch_lloyd_george_record_happy_path(

patched_stitch_service.upload_stitched_lg_record_and_retrieve_presign_url.assert_called_with(
stitched_lg_record=MOCK_STITCHED_FILE,
filename_on_bucket=f"{TEST_NHS_NUMBER}/{MOCK_STITCHED_FILE_ON_S3}",
filename_on_bucket=MOCK_STITCHED_FILE_ON_S3,
)


Expand Down Expand Up @@ -292,7 +290,7 @@ def test_stitch_lloyd_george_record_raise_500_error_if_failed_to_upload_stitched
def test_get_lloyd_george_record_for_patient(
stitch_service, mock_fetch_doc_ref_by_type
):
mock_filters = filter_expression_for_available_docs()
mock_filters = filter_uploaded_docs_and_recently_uploading_docs()

expected = MOCK_LLOYD_GEORGE_DOCUMENT_REFS
actual = stitch_service.get_lloyd_george_record_for_patient(TEST_NHS_NUMBER)
Expand Down Expand Up @@ -351,15 +349,6 @@ def test_download_lloyd_george_files_raise_error_when_failed_to_download(
stitch_service.download_lloyd_george_files(MOCK_LLOYD_GEORGE_DOCUMENT_REFS)


def test_make_filename_for_stitched_file(stitch_service):
expected = f"Combined_Lloyd_George_Record_[Joe Bloggs]_[{TEST_NHS_NUMBER}]_[30-12-2019].pdf"
actual = stitch_service.make_filename_for_stitched_file(
MOCK_LLOYD_GEORGE_DOCUMENT_REFS
)

assert actual == expected


def test_get_most_recent_created_date(stitch_service):
lg_record = build_lg_doc_ref_list(page_numbers=[1, 2, 3])
lg_record[2].created = "2024-12-14T16:46:07.678657Z"
Expand All @@ -374,7 +363,9 @@ def test_get_total_file_size(mocker, stitch_service):
mocker.patch("os.path.getsize", side_effect=[19000, 20000, 21000])

expected = 60000
actual = stitch_service.get_total_file_size(["file1.pdf", "file2.pdf", "file3.pdf"])
actual = stitch_service.get_total_file_size_in_bytes(
["file1.pdf", "file2.pdf", "file3.pdf"]
)

assert actual == expected

Expand All @@ -383,13 +374,13 @@ def test_upload_stitched_lg_record_and_retrieve_presign_url(mock_s3, stitch_serv
expected = MOCK_PRESIGNED_URL
actual = stitch_service.upload_stitched_lg_record_and_retrieve_presign_url(
stitched_lg_record=MOCK_STITCHED_FILE,
filename_on_bucket=f"{TEST_NHS_NUMBER}/{MOCK_STITCHED_FILE_ON_S3}",
filename_on_bucket=MOCK_STITCHED_FILE_ON_S3,
)

assert actual == expected

mock_s3.upload_file_with_extra_args.assert_called_with(
file_key=f"{TEST_NHS_NUMBER}/{MOCK_STITCHED_FILE_ON_S3}",
file_key=MOCK_STITCHED_FILE_ON_S3,
file_name=MOCK_STITCHED_FILE,
s3_bucket_name=MOCK_LG_BUCKET,
extra_args={
Expand Down
2 changes: 1 addition & 1 deletion lambdas/utils/dynamo_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def create_expression_attribute_placeholder(value: str) -> str:
return f"#{inflection.camelize(value, uppercase_first_letter=True)}_attr"


def filter_expression_for_available_docs():
def filter_uploaded_docs_and_recently_uploading_docs():
filter_builder = DynamoQueryFilterBuilder()
time_limit = int(datetime.now().timestamp() - (60 * 3))

Expand Down
Loading