From acd8d0ccce14940c12c940effa289b909f3375cd Mon Sep 17 00:00:00 2001 From: abbas-khan10 <127417949+abbas-khan10@users.noreply.github.com> Date: Fri, 4 Oct 2024 18:36:04 +0100 Subject: [PATCH] PRMP-756: Add bulk report output models, additional reports and updated S3 filepaths --- lambdas/models/bulk_upload_report.py | 35 ++ lambdas/models/bulk_upload_report_output.py | 168 +++++++ lambdas/models/bulk_upload_status.py | 40 -- .../bulk_upload_dynamo_repository.py | 2 +- .../services/bulk_upload_report_service.py | 389 ++++++++++------ .../services/statistical_report_service.py | 12 +- .../data/bulk_upload/dynamo_responses.py | 261 +++++++++++ .../expected_bulk_upload_summary_report.csv | 18 +- .../bulk_upload/expected_deceased_report.csv | 3 + .../expected_ods_report_for_uploader_1.csv | 6 + .../expected_ods_report_for_uploader_2.csv | 6 + .../bulk_upload/expected_rejected_report.csv | 7 + .../expected_restricted_report.csv | 3 + .../bulk_upload/expected_success_report.csv | 11 + .../bulk_upload/expected_suspended_report.csv | 3 + ...d_status.py => test_bulk_upload_report.py} | 6 +- .../models/test_bulk_upload_report_output.py | 374 +++++++++++++++ .../test_bulk_upload_report_service.py | 429 ++++++++++++------ .../test_statistical_report_service.py | 6 +- lambdas/utils/utilities.py | 6 + 20 files changed, 1463 insertions(+), 322 deletions(-) create mode 100644 lambdas/models/bulk_upload_report.py create mode 100644 lambdas/models/bulk_upload_report_output.py delete mode 100644 lambdas/models/bulk_upload_status.py create mode 100644 lambdas/tests/unit/helpers/data/bulk_upload/dynamo_responses.py create mode 100644 lambdas/tests/unit/helpers/data/bulk_upload/expected_deceased_report.csv create mode 100644 lambdas/tests/unit/helpers/data/bulk_upload/expected_ods_report_for_uploader_1.csv create mode 100644 lambdas/tests/unit/helpers/data/bulk_upload/expected_ods_report_for_uploader_2.csv create mode 100644 lambdas/tests/unit/helpers/data/bulk_upload/expected_rejected_report.csv create mode 100644 lambdas/tests/unit/helpers/data/bulk_upload/expected_restricted_report.csv create mode 100644 lambdas/tests/unit/helpers/data/bulk_upload/expected_success_report.csv create mode 100644 lambdas/tests/unit/helpers/data/bulk_upload/expected_suspended_report.csv rename lambdas/tests/unit/models/{test_bulk_upload_status.py => test_bulk_upload_report.py} (95%) create mode 100644 lambdas/tests/unit/models/test_bulk_upload_report_output.py diff --git a/lambdas/models/bulk_upload_report.py b/lambdas/models/bulk_upload_report.py new file mode 100644 index 000000000..9c962260a --- /dev/null +++ b/lambdas/models/bulk_upload_report.py @@ -0,0 +1,35 @@ +from datetime import datetime +from typing import Optional + +from enums.metadata_report import MetadataReport +from pydantic import BaseModel, ConfigDict, Field +from pydantic.alias_generators import to_pascal +from utils.audit_logging_setup import LoggingService +from utils.utilities import create_reference_id + +logger = LoggingService(__name__) + + +class BulkUploadReport(BaseModel): + model_config = ConfigDict(alias_generator=to_pascal, populate_by_name=True) + id: str = Field(alias=MetadataReport.ID, default_factory=create_reference_id) + nhs_number: str = Field(alias=MetadataReport.NhsNumber) + upload_status: str = Field(alias=MetadataReport.UploadStatus) + timestamp: int = Field( + alias=MetadataReport.Timestamp, + default_factory=lambda: int(datetime.now().timestamp()), + ) + date: str = Field( + alias=MetadataReport.Date, + default_factory=lambda: date_string_yyyymmdd(datetime.now()), + ) + file_path: str = Field(alias=MetadataReport.FilePath) + pds_ods_code: str = Field(alias=MetadataReport.PdsOdsCode) + uploader_ods_code: str = Field(alias=MetadataReport.UploaderOdsCode) + failure_reason: Optional[str] = Field( + default="", alias=MetadataReport.FailureReason + ) + + +def date_string_yyyymmdd(time_now: datetime) -> str: + return time_now.strftime("%Y-%m-%d") diff --git a/lambdas/models/bulk_upload_report_output.py b/lambdas/models/bulk_upload_report_output.py new file mode 100644 index 000000000..a9fa49dd3 --- /dev/null +++ b/lambdas/models/bulk_upload_report_output.py @@ -0,0 +1,168 @@ +from enums.metadata_report import MetadataReport +from enums.patient_ods_inactive_status import PatientOdsInactiveStatus +from enums.upload_status import UploadStatus +from inflection import underscore +from models.bulk_upload_report import BulkUploadReport +from utils.audit_logging_setup import LoggingService + +logger = LoggingService(__name__) + + +class ReportBase: + def __init__( + self, + generated_at: str, + ): + self.generated_at = generated_at + self.total_successful = set() + self.total_registered_elsewhere = set() + self.total_suspended = set() + self.total_deceased = set() + self.total_restricted = set() + + def get_total_successful_nhs_numbers(self) -> list: + if self.total_successful: + return [patient[0] for patient in self.total_successful] + return [] + + def get_total_successful_count(self) -> int: + return len(self.total_successful) + + def get_total_registered_elsewhere_count(self) -> int: + return len(self.total_registered_elsewhere) + + def get_total_suspended_count(self) -> int: + return len(self.total_suspended) + + def get_total_deceased_count(self) -> int: + return len(self.total_deceased) + + def get_total_restricted_count(self) -> int: + return len(self.total_restricted) + + @staticmethod + def get_sorted(to_sort: set) -> list: + return sorted(to_sort, key=lambda x: x[0]) if to_sort else [] + + +class OdsReport(ReportBase): + def __init__( + self, + generated_at: str, + uploader_ods_code: str = "", + report_items: list[BulkUploadReport] = [], + ): + super().__init__(generated_at) + self.report_items = report_items + self.uploader_ods_code = uploader_ods_code + self.failures_per_patient = {} + self.unique_failures = {} + + self.populate_report() + + def populate_report(self): + logger.info(f"Generating ODS report file for {self.uploader_ods_code}") + + for item in self.report_items: + if item.upload_status == UploadStatus.COMPLETE: + self.process_successful_report_item(item) + elif item.upload_status == UploadStatus.FAILED: + self.process_failed_report_item(item) + + self.set_unique_failures() + + def process_successful_report_item(self, item: BulkUploadReport): + self.total_successful.add((item.nhs_number, item.date)) + + if item.pds_ods_code == PatientOdsInactiveStatus.SUSPENDED: + self.total_suspended.add((item.nhs_number, item.date)) + elif item.pds_ods_code == PatientOdsInactiveStatus.DECEASED: + self.total_deceased.add((item.nhs_number, item.date, item.failure_reason)) + elif item.pds_ods_code == PatientOdsInactiveStatus.RESTRICTED: + self.total_restricted.add((item.nhs_number, item.date)) + elif ( + item.uploader_ods_code != item.pds_ods_code + and item.pds_ods_code not in PatientOdsInactiveStatus.list() + ): + self.total_registered_elsewhere.add((item.nhs_number, item.date)) + + def process_failed_report_item(self, item: BulkUploadReport): + is_new_failure = item.nhs_number not in self.failures_per_patient + + is_timestamp_newer = ( + item.nhs_number in self.failures_per_patient + and self.failures_per_patient[item.nhs_number].get(MetadataReport.Timestamp) + < item.timestamp + ) + + if (item.failure_reason and is_new_failure) or is_timestamp_newer: + self.failures_per_patient.update( + { + item.nhs_number: item.model_dump( + include={ + underscore(str(MetadataReport.Date)), + underscore(str(MetadataReport.Timestamp)), + underscore(str(MetadataReport.UploaderOdsCode)), + underscore(str(MetadataReport.FailureReason)), + }, + by_alias=True, + ) + } + ) + + def set_unique_failures(self): + patients_to_remove = { + patient + for patient in self.failures_per_patient + if patient in self.get_total_successful_nhs_numbers() + } + for patient in patients_to_remove: + self.failures_per_patient.pop(patient) + + for patient_data in self.failures_per_patient.values(): + reason = patient_data.get(MetadataReport.FailureReason) + self.unique_failures[reason] = self.unique_failures.get(reason, 0) + 1 + + def get_unsuccessful_reasons_data_rows(self): + return [ + [MetadataReport.FailureReason, failure_reason, count] + for failure_reason, count in self.unique_failures.items() + ] + + +class SummaryReport(ReportBase): + def __init__(self, generated_at: str, ods_reports: list[OdsReport] = []): + super().__init__(generated_at) + self.ods_reports = ods_reports + self.success_summary = [] + self.reason_summary = [] + + self.populate_report() + + def populate_report(self): + ods_code_success_total = {} + + for report in self.ods_reports: + self.total_successful.update(report.total_successful) + self.total_registered_elsewhere.update(report.total_registered_elsewhere) + self.total_suspended.update(report.total_suspended) + self.total_deceased.update(report.total_deceased) + self.total_restricted.update(report.total_restricted) + ods_code_success_total[report.uploader_ods_code] = report.total_successful + + for reason, count in report.unique_failures.items(): + self.reason_summary.append( + [ + f"{MetadataReport.FailureReason} for {report.uploader_ods_code}", + reason, + count, + ] + ) + + if ods_code_success_total: + for uploader_ods_code, nhs_numbers in ods_code_success_total.items(): + self.success_summary.append( + ["Success by ODS", uploader_ods_code, len(nhs_numbers)] + ) + else: + self.success_summary.append(["Success by ODS", "No ODS codes found", 0]) diff --git a/lambdas/models/bulk_upload_status.py b/lambdas/models/bulk_upload_status.py deleted file mode 100644 index 882181e81..000000000 --- a/lambdas/models/bulk_upload_status.py +++ /dev/null @@ -1,40 +0,0 @@ -from datetime import datetime -from typing import Optional - -from enums.upload_status import UploadStatus -from pydantic import BaseModel, ConfigDict, Field -from pydantic.alias_generators import to_pascal -from utils.utilities import create_reference_id - - -class BulkUploadReport(BaseModel): - model_config = ConfigDict(alias_generator=to_pascal, populate_by_name=True) - id: str = Field(alias="ID", default_factory=create_reference_id) - nhs_number: str - upload_status: UploadStatus - timestamp: int = Field(default_factory=lambda: int(datetime.now().timestamp())) - date: str = Field(default_factory=lambda: date_string_yyyymmdd(datetime.now())) - file_path: str - pds_ods_code: str = "" - uploader_ods_code: str = "" - failure_reason: Optional[str] = None - - -def date_string_yyyymmdd(time_now: datetime) -> str: - return time_now.strftime("%Y-%m-%d") - - -class OdsReport: - def __init__( - self, - uploader_ods_code: str, - total_successful: int = 0, - total_registered_elsewhere: int = 0, - total_suspended: int = 0, - failure_reasons: dict = {}, - ): - self.uploader_ods_code = uploader_ods_code - self.total_successful = total_successful - self.total_registered_elsewhere = total_registered_elsewhere - self.total_suspended = total_suspended - self.failure_reasons = failure_reasons diff --git a/lambdas/repositories/bulk_upload/bulk_upload_dynamo_repository.py b/lambdas/repositories/bulk_upload/bulk_upload_dynamo_repository.py index 2c1e0ab2d..b0e8130fd 100644 --- a/lambdas/repositories/bulk_upload/bulk_upload_dynamo_repository.py +++ b/lambdas/repositories/bulk_upload/bulk_upload_dynamo_repository.py @@ -2,7 +2,7 @@ from enums.metadata_field_names import DocumentReferenceMetadataFields from enums.upload_status import UploadStatus -from models.bulk_upload_status import BulkUploadReport +from models.bulk_upload_report import BulkUploadReport from models.nhs_document_reference import NHSDocumentReference from models.staging_metadata import StagingMetadata from services.base.dynamo_service import DynamoDBService diff --git a/lambdas/services/bulk_upload_report_service.py b/lambdas/services/bulk_upload_report_service.py index 6e46fb940..422118ac9 100644 --- a/lambdas/services/bulk_upload_report_service.py +++ b/lambdas/services/bulk_upload_report_service.py @@ -1,16 +1,16 @@ import csv import datetime import os -from typing import Optional from boto3.dynamodb.conditions import Attr from enums.metadata_report import MetadataReport -from enums.patient_ods_inactive_status import PatientOdsInactiveStatus -from enums.upload_status import UploadStatus -from models.bulk_upload_status import OdsReport +from models.bulk_upload_report import BulkUploadReport +from models.bulk_upload_report_output import OdsReport, SummaryReport +from pydantic import ValidationError from services.base.dynamo_service import DynamoDBService from services.base.s3_service import S3Service from utils.audit_logging_setup import LoggingService +from utils.utilities import generate_date_folder_name logger = LoggingService(__name__) @@ -20,6 +20,8 @@ def __init__(self): self.db_service = DynamoDBService() self.s3_service = S3Service() self.reports_bucket = os.getenv("STATISTICAL_REPORTS_BUCKET") + self.generated_on = "" + self.s3_key_prefix = "" def report_handler(self): start_time, end_time = self.get_times_for_scan() @@ -32,199 +34,279 @@ def report_handler(self): f"Bulk upload reports for {str(start_time)} to {str(end_time)}.csv" ) - generated_at = end_time.strftime("%Y%m%d") - - ods_reports: list[OdsReport] = self.generate_ods_reports( - report_data, generated_at - ) + ods_reports: list[OdsReport] = self.generate_ods_reports(report_data) logger.info("Successfully processed daily ODS reports") - - self.generate_summary_report(ods_reports, generated_at) + self.generate_summary_report(ods_reports) logger.info("Successfully processed daily summary report") - self.generate_daily_report(report_data, start_time, end_time) logger.info("Successfully processed daily report") - + self.generate_success_report(ods_reports) + logger.info("Successfully processed success report") + self.generate_suspended_report(ods_reports) + logger.info("Successfully processed suspended report") + self.generate_deceased_report(ods_reports) + logger.info("Successfully processed deceased report") + self.generate_restricted_report(ods_reports) + logger.info("Successfully processed restricted report") + self.generate_rejected_report(ods_reports) + logger.info("Successfully processed rejected report") else: logger.info("No data found, no new report file to upload") def generate_ods_reports( - self, report_data: list[dict], generated_at: str + self, report_data: list[BulkUploadReport] ) -> list[OdsReport]: ods_reports: list[OdsReport] = [] grouped_ods_data = {} for item in report_data: - uploader_ods_code = item.get(MetadataReport.UploaderOdsCode, "") + uploader_ods_code = item.uploader_ods_code if uploader_ods_code is not None and item is not None: grouped_ods_data.setdefault(uploader_ods_code, []).append(item) - for uploader_ods_code, ods_data in grouped_ods_data.items(): + for uploader_ods_code, ods_report_items in grouped_ods_data.items(): ods_report = self.generate_individual_ods_report( - uploader_ods_code, ods_data, generated_at + uploader_ods_code, ods_report_items ) ods_reports.append(ods_report) return ods_reports def generate_individual_ods_report( - self, uploader_ods_code: str, ods_report_data: list[dict], generated_at: str + self, uploader_ods_code: str, ods_report_items: list[BulkUploadReport] ) -> OdsReport: - total_successful = set() - total_registered_elsewhere = set() - total_suspended = set() - failures_per_patient = {} - unique_failures = {} - - logger.info(f"Generating ODS report file for {uploader_ods_code}") - - for item in ods_report_data: - nhs_number = item.get(MetadataReport.NhsNumber) - upload_status = item.get(MetadataReport.UploadStatus) - pds_ods_code = item.get(MetadataReport.PdsOdsCode) - time_stamp = item.get(MetadataReport.Timestamp) - - uploader_ods_code = item.get( - MetadataReport.UploaderOdsCode, "Could not find uploader ODS code" - ) - - if upload_status == UploadStatus.COMPLETE: - total_successful.add(nhs_number) - - if pds_ods_code == PatientOdsInactiveStatus.SUSPENDED: - total_suspended.add(nhs_number) - - elif uploader_ods_code != pds_ods_code: - total_registered_elsewhere.add(nhs_number) - - elif upload_status == UploadStatus.FAILED: - failure_reason = item.get(MetadataReport.FailureReason, "") - if ( - failure_reason and nhs_number not in failures_per_patient - ) or failures_per_patient[nhs_number].get( - MetadataReport.Timestamp - ) < time_stamp: - failures_per_patient.update( - { - nhs_number: { - MetadataReport.FailureReason: failure_reason, - MetadataReport.Timestamp: time_stamp, - } - } - ) - - patient_to_remove = [] - for patient in failures_per_patient: - if patient in total_successful: - patient_to_remove.append(patient) - for patient in patient_to_remove: - failures_per_patient.pop(patient) - - for patient_data in failures_per_patient.values(): - reason = patient_data.get(MetadataReport.FailureReason) - if reason not in unique_failures: - unique_failures[reason] = 0 - unique_failures[reason] += 1 - ods_report = OdsReport( + generated_at=self.generated_on, + report_items=ods_report_items, uploader_ods_code=uploader_ods_code, - total_successful=len(total_successful), - total_registered_elsewhere=len(total_registered_elsewhere), - total_suspended=len(total_suspended), - failure_reasons=unique_failures, ) - extra_row_values = [] - for failure_reason, count in ods_report.failure_reasons.items(): - extra_row_values.append(["FailureReason", failure_reason, count]) - - file_key = f"daily_statistical_report_bulk_upload_summary_{generated_at}_uploaded_by_{uploader_ods_code}.csv" + file_name = f"daily_statistical_report_bulk_upload_ods_summary_{self.generated_on}_uploaded_by_{uploader_ods_code}.csv" self.write_summary_data_to_csv( - file_name=file_key, - total_successful=ods_report.total_successful, - total_registered_elsewhere=ods_report.total_registered_elsewhere, - total_suspended=ods_report.total_suspended, - extra_rows=extra_row_values, + file_name=file_name, + total_successful=ods_report.get_total_successful_count(), + total_registered_elsewhere=ods_report.get_total_registered_elsewhere_count(), + total_suspended=ods_report.get_total_suspended_count(), + extra_rows=ods_report.get_unsuccessful_reasons_data_rows(), ) logger.info(f"Uploading ODS report file for {uploader_ods_code} to S3") self.s3_service.upload_file( s3_bucket_name=self.reports_bucket, - file_key=file_key, - file_name=f"/tmp/{file_key}", + file_key=f"{self.s3_key_prefix}/{file_name}", + file_name=f"/tmp/{file_name}", ) return ods_report - def generate_summary_report(self, ods_reports: list[OdsReport], generated_at: str): - total_successful = 0 - total_registered_elsewhere = 0 - total_suspended = 0 - ods_code_totals = {} - extra_row_values = [] + def generate_summary_report(self, ods_reports: list[OdsReport]): + summary_report = SummaryReport( + generated_at=self.generated_on, ods_reports=ods_reports + ) + + file_name = ( + f"daily_statistical_report_bulk_upload_summary_{self.generated_on}.csv" + ) + + self.write_summary_data_to_csv( + file_name=file_name, + total_successful=summary_report.get_total_successful_count(), + total_registered_elsewhere=summary_report.get_total_registered_elsewhere_count(), + total_suspended=summary_report.get_total_suspended_count(), + total_deceased=summary_report.get_total_deceased_count(), + total_restricted=summary_report.get_total_restricted_count(), + extra_rows=summary_report.success_summary + summary_report.reason_summary, + ) + + logger.info("Uploading daily summary report file to S3") + self.s3_service.upload_file( + s3_bucket_name=self.reports_bucket, + file_key=f"{self.s3_key_prefix}/{file_name}", + file_name=f"/tmp/{file_name}", + ) + def generate_daily_report( + self, report_data: list[BulkUploadReport], start_time: str, end_time: str + ): + file_name = f"daily_statistical_report_entire_bulk_upload_{str(start_time)}_to_{str(end_time)}.csv" + + self.write_items_to_csv(report_data, f"/tmp/{file_name}") + + logger.info("Uploading daily report file to S3") + self.s3_service.upload_file( + s3_bucket_name=self.reports_bucket, + file_key=f"{self.s3_key_prefix}/{file_name}", + file_name=f"/tmp/{file_name}", + ) + + def generate_success_report(self, ods_reports: list[OdsReport]): + file_name = ( + f"daily_statistical_report_bulk_upload_success_{self.generated_on}.csv" + ) + + headers = [ + MetadataReport.NhsNumber, + MetadataReport.UploaderOdsCode, + MetadataReport.Date, + ] + data_rows = [] for report in ods_reports: - total_successful += report.total_successful - total_registered_elsewhere += report.total_registered_elsewhere - total_suspended += report.total_suspended - ods_code_totals[report.uploader_ods_code] = report.total_successful + for patient in report.get_sorted(report.total_successful): + data_rows.append( + [str(patient[0]), str(report.uploader_ods_code), str(patient[1])] + ) + + self.write_additional_report_items_to_csv( + file_name=file_name, headers=headers, rows_to_write=data_rows + ) + + logger.info("Uploading daily success report file to S3") + self.s3_service.upload_file( + s3_bucket_name=self.reports_bucket, + file_key=f"{self.s3_key_prefix}/{file_name}", + file_name=f"/tmp/{file_name}", + ) + + def generate_suspended_report(self, ods_reports: list[OdsReport]): + file_name = ( + f"daily_statistical_report_bulk_upload_suspended_{self.generated_on}.csv" + ) + + headers = [ + MetadataReport.NhsNumber, + MetadataReport.UploaderOdsCode, + MetadataReport.Date, + ] + data_rows = [] + for report in ods_reports: + for patient in report.get_sorted(report.total_suspended): + data_rows.append( + [str(patient[0]), str(report.uploader_ods_code), str(patient[1])] + ) - for failure_reason, count in report.failure_reasons.items(): - extra_row_values.append( + self.write_additional_report_items_to_csv( + file_name=file_name, headers=headers, rows_to_write=data_rows + ) + + logger.info("Uploading daily suspended report file to S3") + self.s3_service.upload_file( + s3_bucket_name=self.reports_bucket, + file_key=f"{self.s3_key_prefix}/{file_name}", + file_name=f"/tmp/{file_name}", + ) + + def generate_deceased_report(self, ods_reports: list[OdsReport]): + file_name = ( + f"daily_statistical_report_bulk_upload_deceased_{self.generated_on}.csv" + ) + + headers = [ + MetadataReport.NhsNumber, + MetadataReport.UploaderOdsCode, + MetadataReport.Date, + MetadataReport.FailureReason, + ] + data_rows = [] + for report in ods_reports: + for patient in report.get_sorted(report.total_deceased): + data_rows.append( [ - f"FailureReason for {report.uploader_ods_code}", - failure_reason, - count, + str(patient[0]), + str(report.uploader_ods_code), + str(patient[1]), + str(patient[2]), ] ) - if ods_code_totals: - for uploader_ods_code, count in ods_code_totals.items(): - extra_row_values.append(["Success by ODS", uploader_ods_code, count]) - else: - extra_row_values.append(["Success by ODS", "No ODS codes found", 0]) + self.write_additional_report_items_to_csv( + file_name=file_name, headers=headers, rows_to_write=data_rows + ) + + logger.info("Uploading daily deceased report file to S3") + self.s3_service.upload_file( + s3_bucket_name=self.reports_bucket, + file_key=f"{self.s3_key_prefix}/{file_name}", + file_name=f"/tmp/{file_name}", + ) - file_name = f"daily_statistical_report_bulk_upload_summary_{generated_at}.csv" - file_key = f"daily-reports/{file_name}" + def generate_restricted_report(self, ods_reports: list[OdsReport]): + file_name = ( + f"daily_statistical_report_bulk_upload_restricted_{self.generated_on}.csv" + ) - self.write_summary_data_to_csv( - file_name=file_name, - total_successful=total_successful, - total_registered_elsewhere=total_registered_elsewhere, - total_suspended=total_suspended, - extra_rows=extra_row_values, + headers = [ + MetadataReport.NhsNumber, + MetadataReport.UploaderOdsCode, + MetadataReport.Date, + ] + data_rows = [] + for report in ods_reports: + for patient in report.get_sorted(report.total_restricted): + data_rows.append( + [str(patient[0]), str(report.uploader_ods_code), str(patient[1])] + ) + + self.write_additional_report_items_to_csv( + file_name=file_name, headers=headers, rows_to_write=data_rows ) - logger.info("Uploading daily report file to S3") + logger.info("Uploading daily restricted report file to S3") self.s3_service.upload_file( s3_bucket_name=self.reports_bucket, - file_key=file_key, + file_key=f"{self.s3_key_prefix}/{file_name}", file_name=f"/tmp/{file_name}", ) - def generate_daily_report(self, report_data: list, start_time: str, end_time: str): - file_name = f"Bulk upload report for {str(start_time)} to {str(end_time)}.csv" + def generate_rejected_report(self, ods_reports: list[OdsReport]): + file_name = ( + f"daily_statistical_report_bulk_upload_rejected_{self.generated_on}.csv" + ) - self.write_items_to_csv(report_data, f"/tmp/{file_name}") + headers = [ + MetadataReport.NhsNumber, + MetadataReport.UploaderOdsCode, + MetadataReport.Date, + MetadataReport.FailureReason, + ] - logger.info("Uploading daily report file to S3") + data_rows = [] + for report in ods_reports: + for nhs_number, report_item in report.failures_per_patient.items(): + data_rows.append( + [ + nhs_number, + report_item[MetadataReport.UploaderOdsCode], + report_item[MetadataReport.Date], + report_item[MetadataReport.FailureReason], + ] + ) + + self.write_additional_report_items_to_csv( + file_name=file_name, headers=headers, rows_to_write=data_rows + ) + + logger.info("Uploading daily rejected report file to S3") self.s3_service.upload_file( s3_bucket_name=self.reports_bucket, - file_key=f"daily-reports/{file_name}", + file_key=f"{self.s3_key_prefix}/{file_name}", file_name=f"/tmp/{file_name}", ) @staticmethod - def write_items_to_csv(items: list, csv_file_path: str): + def write_items_to_csv(items: list[BulkUploadReport], csv_file_path: str): logger.info("Writing scan results to csv file") with open(csv_file_path, "w") as output_file: field_names = MetadataReport.list() dict_writer_object = csv.DictWriter(output_file, fieldnames=field_names) dict_writer_object.writeheader() for item in items: - dict_writer_object.writerow(item) + dict_writer_object.writerow( + item.model_dump( + exclude={str(MetadataReport.ID).lower()}, by_alias=True + ) + ) @staticmethod def write_summary_data_to_csv( @@ -232,7 +314,9 @@ def write_summary_data_to_csv( total_successful: int, total_registered_elsewhere: int, total_suspended: int, - extra_rows: [], + total_deceased: int = None, + total_restricted: int = None, + extra_rows: list = [], ): with open(f"/tmp/{file_name}", "w", newline="") as output_file: writer = csv.writer(output_file) @@ -246,23 +330,32 @@ def write_summary_data_to_csv( total_registered_elsewhere, ] ) - writer.writerow(["Total", "Suspended", total_suspended]) + writer.writerow(["Total", "Successful - Suspended", total_suspended]) + + if total_deceased: + writer.writerow(["Total", "Successful - Deceased", total_deceased]) + + if total_restricted: + writer.writerow(["Total", "Successful - Restricted", total_restricted]) for row in extra_rows: writer.writerow(row) - def group_by_uploader_ods_code(self, report_data): - ods_reports = {} - for item in report_data: - uploader_ods_code = item.get(MetadataReport.UploaderOdsCode, "Unknown") - if uploader_ods_code not in ods_reports: - ods_reports[uploader_ods_code] = [] - ods_reports[uploader_ods_code].append(item) - return ods_reports + @staticmethod + def write_additional_report_items_to_csv( + file_name: str, + headers: list[str] = [], + rows_to_write: list[list[str]] = [], + ): + with open(f"/tmp/{file_name}", "w", newline="") as output_file: + writer = csv.writer(output_file) + writer.writerow(headers) + for row in rows_to_write: + writer.writerow(row) def get_dynamodb_report_items( self, start_timestamp: int, end_timestamp: int - ) -> Optional[list]: + ) -> list[BulkUploadReport]: logger.info("Starting Scan on DynamoDB table") bulk_upload_table_name = os.getenv("BULK_UPLOAD_DYNAMODB_NAME") filter_time = Attr("Timestamp").gt(start_timestamp) & Attr("Timestamp").lt( @@ -273,7 +366,7 @@ def get_dynamodb_report_items( ) if "Items" not in db_response: - return None + return [] items = db_response["Items"] while "LastEvaluatedKey" in db_response: db_response = self.db_service.scan_table( @@ -283,10 +376,17 @@ def get_dynamodb_report_items( ) if db_response["Items"]: items.extend(db_response["Items"]) - return items - @staticmethod - def get_times_for_scan() -> tuple[datetime, datetime]: + validated_items = [] + for item in items: + try: + validated_items.append(BulkUploadReport.model_validate(item)) + except ValidationError as e: + logger.error(f"Failed to parse bulk update report dynamo item: {e}") + + return validated_items + + def get_times_for_scan(self) -> tuple[datetime, datetime]: current_time = datetime.datetime.now() end_report_time = datetime.time(7, 00, 00, 0) today_date = datetime.datetime.today() @@ -294,4 +394,9 @@ def get_times_for_scan() -> tuple[datetime, datetime]: if current_time < end_timestamp: end_timestamp -= datetime.timedelta(days=1) start_timestamp = end_timestamp - datetime.timedelta(days=1) + + self.generated_on = start_timestamp.strftime("%Y%m%d") + date_folder_name = generate_date_folder_name(self.generated_on) + self.s3_key_prefix = f"bulk-upload-reports/{date_folder_name}" + return start_timestamp, end_timestamp diff --git a/lambdas/services/statistical_report_service.py b/lambdas/services/statistical_report_service.py index 18f8837b1..85c8a4767 100644 --- a/lambdas/services/statistical_report_service.py +++ b/lambdas/services/statistical_report_service.py @@ -18,6 +18,7 @@ from services.base.s3_service import S3Service from utils.audit_logging_setup import LoggingService from utils.exceptions import StatisticDataNotFoundException +from utils.utilities import generate_date_folder_name logger = LoggingService(__name__) @@ -159,7 +160,7 @@ def summarise_application_data( pl.concat_list("active_user_ids_hashed") .flatten() .unique() - .apply(lambda col: str(col.sort().to_list())) + .map_elements(lambda col: str(col.sort().to_list())) .alias("unique_active_user_ids_hashed"), pl.concat_list("active_user_ids_hashed") .flatten() @@ -218,13 +219,20 @@ def rename_snakecase_columns(column_name: str) -> str: def store_report_to_s3(self, weekly_summary: pl.DataFrame) -> None: logger.info("Saving the weekly report as .csv") file_name = f"statistical_report_{self.report_period}.csv" + end_date = self.dates_to_collect[-1] + date_folder_name = generate_date_folder_name(end_date) + temp_folder = tempfile.mkdtemp() local_file_path = os.path.join(temp_folder, file_name) try: weekly_summary.write_csv(local_file_path) logger.info("Uploading the csv report to S3 bucket...") - self.s3_service.upload_file(local_file_path, self.reports_bucket, file_name) + self.s3_service.upload_file( + s3_bucket_name=self.reports_bucket, + file_key=f"statistic-reports/{date_folder_name}/{file_name}", + file_name=local_file_path, + ) logger.info("The weekly report is stored in s3 bucket.") logger.info(f"File name: {file_name}") diff --git a/lambdas/tests/unit/helpers/data/bulk_upload/dynamo_responses.py b/lambdas/tests/unit/helpers/data/bulk_upload/dynamo_responses.py new file mode 100644 index 000000000..27a152645 --- /dev/null +++ b/lambdas/tests/unit/helpers/data/bulk_upload/dynamo_responses.py @@ -0,0 +1,261 @@ +from copy import deepcopy + +from models.bulk_upload_report import BulkUploadReport +from tests.unit.conftest import TEST_UUID + +TEST_UPLOADER_ODS_1 = "Y12345" +TEST_UPLOADER_ODS_2 = "Z12345" + +MOCK_REPORT_ITEMS_FOR_UPLOADER_1 = [ + { + "Timestamp": 1688395680, + "Date": "2012-01-13", + "NhsNumber": "9000000000", + "FilePath": "/0000000000/2of2_Lloyd_George_Record_[NAME_2]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_1, + "UploadStatus": "complete", + "ID": TEST_UUID, + "PdsOdsCode": "SUSP", + }, + { + "Timestamp": 1688395680, + "Date": "2012-01-13", + "NhsNumber": "9000000001", + "FilePath": "/0000000000/2of2_Lloyd_George_Record_[NAME_2]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_1, + "FailureReason": "Patient is deceased - INFORMAL", + "UploadStatus": "complete", + "ID": TEST_UUID, + "PdsOdsCode": "DECE", + }, + { + "Timestamp": 1688395680, + "Date": "2012-01-13", + "NhsNumber": "9000000002", + "FilePath": "/0000000000/2of2_Lloyd_George_Record_[NAME_2]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_1, + "UploadStatus": "complete", + "ID": TEST_UUID, + "PdsOdsCode": "REST", + }, + { + "Timestamp": 1688395680, + "Date": "2012-01-13", + "NhsNumber": "9000000003", + "FilePath": "/0000000000/1of1_Lloyd_George_Record_[NAME]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_1, + "UploadStatus": "complete", + "ID": TEST_UUID, + "PdsOdsCode": TEST_UPLOADER_ODS_1, + }, + { + "Timestamp": 1688395681, + "Date": "2012-01-13", + "NhsNumber": "9000000003", + "FilePath": "/0000000000/1of2_Lloyd_George_Record_[NAME_2]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_1, + "FailureReason": "Could not find the given patient on PDS", + "UploadStatus": "failed", + "ID": TEST_UUID, + "PdsOdsCode": TEST_UPLOADER_ODS_1, + }, + { + "Timestamp": 1688395680, + "Date": "2012-01-13", + "NhsNumber": "9000000004", + "FilePath": "/0000000000/1of1_Lloyd_George_Record_[NAME]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_1, + "UploadStatus": "complete", + "ID": TEST_UUID, + "PdsOdsCode": TEST_UPLOADER_ODS_2, + }, + { + "Timestamp": 1688395680, + "Date": "2012-01-13", + "NhsNumber": "9000000004", + "FilePath": "/0000000000/1of1_Lloyd_George_Record_[NAME]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_1, + "UploadStatus": "complete", + "ID": TEST_UUID, + "PdsOdsCode": TEST_UPLOADER_ODS_2, + }, + { + "Timestamp": 1688395681, + "Date": "2012-01-13", + "NhsNumber": "9000000005", + "FilePath": "/0000000000/1of2_Lloyd_George_Record_[NAME_2]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_1, + "FailureReason": "Could not find the given patient on PDS", + "UploadStatus": "failed", + "ID": TEST_UUID, + "PdsOdsCode": TEST_UPLOADER_ODS_1, + }, + { + "Timestamp": 1688395681, + "Date": "2012-01-13", + "NhsNumber": "9000000006", + "FilePath": "/0000000000/1of2_Lloyd_George_Record_[NAME_2]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_1, + "FailureReason": "Could not find the given patient on PDS", + "UploadStatus": "failed", + "ID": TEST_UUID, + "PdsOdsCode": TEST_UPLOADER_ODS_1, + }, + { + "Timestamp": 1688395680, + "Date": "2012-01-13", + "NhsNumber": "9000000006", + "FilePath": "/0000000000/2of2_Lloyd_George_Record_[NAME_2]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_1, + "FailureReason": "Lloyd George file already exists", + "UploadStatus": "failed", + "ID": TEST_UUID, + "PdsOdsCode": TEST_UPLOADER_ODS_1, + }, + { + "Timestamp": 1688395681, + "Date": "2012-01-13", + "NhsNumber": "9000000007", + "FilePath": "/0000000000/2of2_Lloyd_George_Record_[NAME_2]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_1, + "FailureReason": "Lloyd George file already exists", + "UploadStatus": "failed", + "ID": TEST_UUID, + "PdsOdsCode": TEST_UPLOADER_ODS_1, + }, +] + +MOCK_REPORT_ITEMS_FOR_UPLOADER_2 = [ + { + "Timestamp": 1688395680, + "Date": "2012-01-13", + "NhsNumber": "9000000009", + "FilePath": "/0000000000/2of2_Lloyd_George_Record_[NAME_2]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_2, + "UploadStatus": "complete", + "ID": TEST_UUID, + "PdsOdsCode": "SUSP", + }, + { + "Timestamp": 1688395680, + "Date": "2012-01-13", + "NhsNumber": "9000000010", + "FilePath": "/0000000000/2of2_Lloyd_George_Record_[NAME_2]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_2, + "FailureReason": "Patient is deceased - FORMAL", + "UploadStatus": "complete", + "ID": TEST_UUID, + "PdsOdsCode": "DECE", + }, + { + "Timestamp": 1688395680, + "Date": "2012-01-13", + "NhsNumber": "9000000011", + "FilePath": "/0000000000/2of2_Lloyd_George_Record_[NAME_2]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_2, + "UploadStatus": "complete", + "ID": TEST_UUID, + "PdsOdsCode": "REST", + }, + { + "Timestamp": 1688395680, + "Date": "2012-01-13", + "NhsNumber": "9000000012", + "FilePath": "/0000000000/1of1_Lloyd_George_Record_[NAME]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_2, + "UploadStatus": "complete", + "ID": TEST_UUID, + "PdsOdsCode": TEST_UPLOADER_ODS_1, + }, + { + "Timestamp": 1688395680, + "Date": "2012-01-13", + "NhsNumber": "9000000012", + "FilePath": "/0000000000/1of1_Lloyd_George_Record_[NAME]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_2, + "UploadStatus": "complete", + "ID": TEST_UUID, + "PdsOdsCode": TEST_UPLOADER_ODS_1, + }, + { + "Timestamp": 1688395680, + "Date": "2012-01-13", + "NhsNumber": "9000000013", + "FilePath": "/0000000000/1of1_Lloyd_George_Record_[NAME]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_2, + "UploadStatus": "complete", + "ID": TEST_UUID, + "PdsOdsCode": "Z12345", + }, + { + "Timestamp": 1688395681, + "Date": "2012-01-13", + "NhsNumber": "9000000014", + "FilePath": "/0000000000/1of2_Lloyd_George_Record_[NAME_2]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_2, + "FailureReason": "Could not find the given patient on PDS", + "UploadStatus": "failed", + "ID": TEST_UUID, + "PdsOdsCode": TEST_UPLOADER_ODS_2, + }, + { + "Timestamp": 1688395681, + "Date": "2012-01-13", + "NhsNumber": "9000000015", + "FilePath": "/0000000000/1of2_Lloyd_George_Record_[NAME_2]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_2, + "FailureReason": "Could not find the given patient on PDS", + "UploadStatus": "failed", + "ID": TEST_UUID, + "PdsOdsCode": TEST_UPLOADER_ODS_2, + }, + { + "Timestamp": 1688395680, + "Date": "2012-01-13", + "NhsNumber": "9000000015", + "FilePath": "/0000000000/2of2_Lloyd_George_Record_[NAME_2]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_2, + "FailureReason": "Lloyd George file already exists", + "UploadStatus": "failed", + "ID": TEST_UUID, + "PdsOdsCode": TEST_UPLOADER_ODS_2, + }, + { + "Timestamp": 1688395680, + "Date": "2012-01-13", + "NhsNumber": "9000000016", + "FilePath": "/0000000000/2of2_Lloyd_George_Record_[NAME_2]_[0000000000]_[DOB].pdf", + "UploaderOdsCode": TEST_UPLOADER_ODS_2, + "FailureReason": "Lloyd George file already exists", + "UploadStatus": "failed", + "ID": TEST_UUID, + "PdsOdsCode": TEST_UPLOADER_ODS_2, + }, +] + +MOCK_REPORT_RESPONSE_ALL = { + "Items": MOCK_REPORT_ITEMS_FOR_UPLOADER_1 + MOCK_REPORT_ITEMS_FOR_UPLOADER_2, + "Count": 4, + "ScannedCount": 4, +} + +MOCK_REPORT_RESPONSE_ALL_WITH_LAST_KEY = deepcopy(MOCK_REPORT_RESPONSE_ALL) +MOCK_REPORT_RESPONSE_ALL_WITH_LAST_KEY.update( + { + "LastEvaluatedKey": { + "FilePath": "/9000000010/2of2_Lloyd_George_Record_[NAME_2]_[9000000010]_[DOB].pdf" + } + } +) + +MOCK_REPORT_ITEMS_ALL = [ + BulkUploadReport.model_validate(item) for item in MOCK_REPORT_RESPONSE_ALL["Items"] +] + +MOCK_REPORT_ITEMS_UPLOADER_1 = [ + BulkUploadReport.model_validate(item) for item in MOCK_REPORT_ITEMS_FOR_UPLOADER_1 +] + +MOCK_REPORT_ITEMS_UPLOADER_2 = [ + BulkUploadReport.model_validate(item) for item in MOCK_REPORT_ITEMS_FOR_UPLOADER_2 +] diff --git a/lambdas/tests/unit/helpers/data/bulk_upload/expected_bulk_upload_summary_report.csv b/lambdas/tests/unit/helpers/data/bulk_upload/expected_bulk_upload_summary_report.csv index 5eafa7686..caa0f602e 100644 --- a/lambdas/tests/unit/helpers/data/bulk_upload/expected_bulk_upload_summary_report.csv +++ b/lambdas/tests/unit/helpers/data/bulk_upload/expected_bulk_upload_summary_report.csv @@ -1,8 +1,12 @@ Type,Description,Count -Total,Total Successful,2 -Total,Successful - Registered Elsewhere,0 -Total,Suspended,0 -FailureReason for Y12345,File name not matching Lloyd George naming convention,1 -FailureReason for TEST,Could not find the given patient on PDS,1 -Success by ODS,Y12345,1 -Success by ODS,TEST,1 +Total,Total Successful,10 +Total,Successful - Registered Elsewhere,2 +Total,Successful - Suspended,2 +Total,Successful - Deceased,2 +Total,Successful - Restricted,2 +Success by ODS,Y12345,5 +Success by ODS,Z12345,5 +FailureReason for Y12345,Could not find the given patient on PDS,2 +FailureReason for Y12345,Lloyd George file already exists,1 +FailureReason for Z12345,Could not find the given patient on PDS,2 +FailureReason for Z12345,Lloyd George file already exists,1 diff --git a/lambdas/tests/unit/helpers/data/bulk_upload/expected_deceased_report.csv b/lambdas/tests/unit/helpers/data/bulk_upload/expected_deceased_report.csv new file mode 100644 index 000000000..85be1afa7 --- /dev/null +++ b/lambdas/tests/unit/helpers/data/bulk_upload/expected_deceased_report.csv @@ -0,0 +1,3 @@ +NhsNumber,UploaderOdsCode,Date,FailureReason +9000000001,Y12345,2012-01-13,Patient is deceased - INFORMAL +9000000010,Z12345,2012-01-13,Patient is deceased - FORMAL diff --git a/lambdas/tests/unit/helpers/data/bulk_upload/expected_ods_report_for_uploader_1.csv b/lambdas/tests/unit/helpers/data/bulk_upload/expected_ods_report_for_uploader_1.csv new file mode 100644 index 000000000..3c52a114e --- /dev/null +++ b/lambdas/tests/unit/helpers/data/bulk_upload/expected_ods_report_for_uploader_1.csv @@ -0,0 +1,6 @@ +Type,Description,Count +Total,Total Successful,5 +Total,Successful - Registered Elsewhere,1 +Total,Successful - Suspended,1 +FailureReason,Could not find the given patient on PDS,2 +FailureReason,Lloyd George file already exists,1 diff --git a/lambdas/tests/unit/helpers/data/bulk_upload/expected_ods_report_for_uploader_2.csv b/lambdas/tests/unit/helpers/data/bulk_upload/expected_ods_report_for_uploader_2.csv new file mode 100644 index 000000000..3c52a114e --- /dev/null +++ b/lambdas/tests/unit/helpers/data/bulk_upload/expected_ods_report_for_uploader_2.csv @@ -0,0 +1,6 @@ +Type,Description,Count +Total,Total Successful,5 +Total,Successful - Registered Elsewhere,1 +Total,Successful - Suspended,1 +FailureReason,Could not find the given patient on PDS,2 +FailureReason,Lloyd George file already exists,1 diff --git a/lambdas/tests/unit/helpers/data/bulk_upload/expected_rejected_report.csv b/lambdas/tests/unit/helpers/data/bulk_upload/expected_rejected_report.csv new file mode 100644 index 000000000..9c447ef86 --- /dev/null +++ b/lambdas/tests/unit/helpers/data/bulk_upload/expected_rejected_report.csv @@ -0,0 +1,7 @@ +NhsNumber,UploaderOdsCode,Date,FailureReason +9000000005,Y12345,2012-01-13,Could not find the given patient on PDS +9000000006,Y12345,2012-01-13,Could not find the given patient on PDS +9000000007,Y12345,2012-01-13,Lloyd George file already exists +9000000014,Z12345,2012-01-13,Could not find the given patient on PDS +9000000015,Z12345,2012-01-13,Could not find the given patient on PDS +9000000016,Z12345,2012-01-13,Lloyd George file already exists diff --git a/lambdas/tests/unit/helpers/data/bulk_upload/expected_restricted_report.csv b/lambdas/tests/unit/helpers/data/bulk_upload/expected_restricted_report.csv new file mode 100644 index 000000000..e9f735913 --- /dev/null +++ b/lambdas/tests/unit/helpers/data/bulk_upload/expected_restricted_report.csv @@ -0,0 +1,3 @@ +NhsNumber,UploaderOdsCode,Date +9000000002,Y12345,2012-01-13 +9000000011,Z12345,2012-01-13 diff --git a/lambdas/tests/unit/helpers/data/bulk_upload/expected_success_report.csv b/lambdas/tests/unit/helpers/data/bulk_upload/expected_success_report.csv new file mode 100644 index 000000000..a466a5300 --- /dev/null +++ b/lambdas/tests/unit/helpers/data/bulk_upload/expected_success_report.csv @@ -0,0 +1,11 @@ +NhsNumber,UploaderOdsCode,Date +9000000000,Y12345,2012-01-13 +9000000001,Y12345,2012-01-13 +9000000002,Y12345,2012-01-13 +9000000003,Y12345,2012-01-13 +9000000004,Y12345,2012-01-13 +9000000009,Z12345,2012-01-13 +9000000010,Z12345,2012-01-13 +9000000011,Z12345,2012-01-13 +9000000012,Z12345,2012-01-13 +9000000013,Z12345,2012-01-13 diff --git a/lambdas/tests/unit/helpers/data/bulk_upload/expected_suspended_report.csv b/lambdas/tests/unit/helpers/data/bulk_upload/expected_suspended_report.csv new file mode 100644 index 000000000..64a45c017 --- /dev/null +++ b/lambdas/tests/unit/helpers/data/bulk_upload/expected_suspended_report.csv @@ -0,0 +1,3 @@ +NhsNumber,UploaderOdsCode,Date +9000000000,Y12345,2012-01-13 +9000000009,Z12345,2012-01-13 diff --git a/lambdas/tests/unit/models/test_bulk_upload_status.py b/lambdas/tests/unit/models/test_bulk_upload_report.py similarity index 95% rename from lambdas/tests/unit/models/test_bulk_upload_status.py rename to lambdas/tests/unit/models/test_bulk_upload_report.py index 0f8694dd8..89004944e 100644 --- a/lambdas/tests/unit/models/test_bulk_upload_status.py +++ b/lambdas/tests/unit/models/test_bulk_upload_report.py @@ -1,6 +1,6 @@ from enums.upload_status import UploadStatus from freezegun import freeze_time -from models.bulk_upload_status import BulkUploadReport +from models.bulk_upload_report import BulkUploadReport from tests.unit.conftest import TEST_UUID MOCK_DATA_COMPLETE_UPLOAD = { @@ -30,6 +30,8 @@ def test_create_successful_upload(): expected = MOCK_DATA_COMPLETE_UPLOAD + expected.update({"FailureReason": ""}) + actual = BulkUploadReport( ID=TEST_UUID, nhs_number="9000000009", @@ -64,6 +66,8 @@ def test_create_failed_upload(): @freeze_time("2023-10-30 10:25:00") def test_successful_upload_ids_and_timestamp_are_auto_populated_if_not_given(mock_uuid): expected = MOCK_DATA_COMPLETE_UPLOAD + expected.update({"FailureReason": ""}) + actual = BulkUploadReport( nhs_number="9000000009", file_path="/9000000009/1of1_Lloyd_George_Record_[Joe Bloggs]_[9000000009]_[25-12-2019].pdf", diff --git a/lambdas/tests/unit/models/test_bulk_upload_report_output.py b/lambdas/tests/unit/models/test_bulk_upload_report_output.py new file mode 100644 index 000000000..1a2f98e8d --- /dev/null +++ b/lambdas/tests/unit/models/test_bulk_upload_report_output.py @@ -0,0 +1,374 @@ +from datetime import datetime + +from enums.metadata_report import MetadataReport +from enums.upload_status import UploadStatus +from freezegun import freeze_time +from models.bulk_upload_report import BulkUploadReport +from models.bulk_upload_report_output import OdsReport, ReportBase, SummaryReport +from tests.unit.helpers.data.bulk_upload.dynamo_responses import ( + MOCK_REPORT_ITEMS_UPLOADER_1, + MOCK_REPORT_ITEMS_UPLOADER_2, + TEST_UPLOADER_ODS_1, + TEST_UPLOADER_ODS_2, +) + + +@freeze_time("2024-01-01 12:00:00") +def get_timestamp(): + return datetime.now().strftime("%Y%m%d") + + +def test_report_base_get_total_successful_nhs_numbers_returns_nhs_numbers(): + base = ReportBase(generated_at=get_timestamp()) + base.total_successful = { + ("9000000000", "2012-01-13"), + ("9000000003", "2012-01-13"), + ("9000000001", "2012-01-13"), + ("9000000002", "2012-01-13"), + ("9000000004", "2012-01-13"), + } + + expected = ["9000000000", "9000000003", "9000000001", "9000000002", "9000000004"] + + actual = base.get_total_successful_nhs_numbers() + + assert sorted(expected) == sorted(actual) + + +def test_report_base_get_total_successful_nhs_numbers_returns_empty(): + base = ReportBase(generated_at=get_timestamp()) + + expected = [] + + actual = base.get_total_successful_nhs_numbers() + + assert expected == actual + + +def test_report_base_get_sorted_sorts_successfully(): + to_sort = { + ("9000000000", "2012-01-13"), + ("9000000003", "2012-01-13"), + ("9000000001", "2012-01-13"), + ("9000000002", "2012-01-13"), + ("9000000004", "2012-01-13"), + } + + expected = [ + ("9000000000", "2012-01-13"), + ("9000000001", "2012-01-13"), + ("9000000002", "2012-01-13"), + ("9000000003", "2012-01-13"), + ("9000000004", "2012-01-13"), + ] + + actual = OdsReport.get_sorted(to_sort) + assert actual == expected + + +def test_report_base_get_sorted_returns_empty(): + to_sort = set() + + expected = [] + + actual = OdsReport.get_sorted(to_sort) + assert actual == expected + + +def test_ods_report_populate_report_populates_successfully(): + expected = { + "generated_at": get_timestamp(), + "total_successful": { + ("9000000000", "2012-01-13"), + ("9000000001", "2012-01-13"), + ("9000000002", "2012-01-13"), + ("9000000003", "2012-01-13"), + ("9000000004", "2012-01-13"), + }, + "total_registered_elsewhere": {("9000000004", "2012-01-13")}, + "total_suspended": {("9000000000", "2012-01-13")}, + "total_deceased": { + ("9000000001", "2012-01-13", "Patient is deceased - INFORMAL") + }, + "total_restricted": {("9000000002", "2012-01-13")}, + "report_items": MOCK_REPORT_ITEMS_UPLOADER_1, + "failures_per_patient": { + "9000000005": { + "Date": "2012-01-13", + "FailureReason": "Could not find the given patient on PDS", + "Timestamp": 1688395681, + "UploaderOdsCode": "Y12345", + }, + "9000000006": { + "Date": "2012-01-13", + "FailureReason": "Could not find the given patient on PDS", + "Timestamp": 1688395681, + "UploaderOdsCode": "Y12345", + }, + "9000000007": { + "Date": "2012-01-13", + "FailureReason": "Lloyd George file already exists", + "Timestamp": 1688395681, + "UploaderOdsCode": "Y12345", + }, + }, + "unique_failures": { + "Could not find the given patient on PDS": 2, + "Lloyd George file already exists": 1, + }, + "uploader_ods_code": TEST_UPLOADER_ODS_1, + } + + actual = OdsReport( + get_timestamp(), + TEST_UPLOADER_ODS_1, + MOCK_REPORT_ITEMS_UPLOADER_1, + ).__dict__ + + assert actual == expected + + +def test_ods_report_process_failed_report_item_handles_failures(): + old_time_stamp = 1698661500 + new_time_stamp = 1698661501 + old_failure_reason = "old reason" + newest_failure_reason = "new reason" + + test_items = [ + BulkUploadReport( + nhs_number="9000000009", + timestamp=old_time_stamp, + date="2023-10-30", + upload_status=UploadStatus.FAILED, + file_path="/9000000009/1of1_Lloyd_George_Record_[Joe Bloggs]_[9000000009]_[25-12-2019].pdf", + failure_reason=old_failure_reason, + pds_ods_code=TEST_UPLOADER_ODS_1, + uploader_ods_code=TEST_UPLOADER_ODS_1, + ) + ] + + new_failed_item = BulkUploadReport( + nhs_number="9000000009", + timestamp=new_time_stamp, + date="2023-10-30", + upload_status=UploadStatus.FAILED, + file_path="/9000000009/1of1_Lloyd_George_Record_[Joe Bloggs]_[9000000009]_[25-12-2019].pdf", + failure_reason=newest_failure_reason, + pds_ods_code=TEST_UPLOADER_ODS_1, + uploader_ods_code=TEST_UPLOADER_ODS_1, + ) + + expected = { + "9000000009": { + "Date": "2023-10-30", + "FailureReason": old_failure_reason, + "Timestamp": old_time_stamp, + "UploaderOdsCode": TEST_UPLOADER_ODS_1, + } + } + + report = OdsReport( + get_timestamp(), + TEST_UPLOADER_ODS_1, + test_items, + ) + report.report_items = test_items + + actual = report.failures_per_patient + assert actual == expected + + report.process_failed_report_item(new_failed_item) + expected = { + "9000000009": { + "Date": "2023-10-30", + "FailureReason": newest_failure_reason, + "Timestamp": new_time_stamp, + "UploaderOdsCode": TEST_UPLOADER_ODS_1, + } + } + + actual = report.failures_per_patient + assert actual == expected + + +def test_ods_report_get_unsuccessful_reasons_data_rows_returns_correct_rows(): + report = OdsReport( + get_timestamp(), + TEST_UPLOADER_ODS_1, + MOCK_REPORT_ITEMS_UPLOADER_1, + ) + + expected = [ + [MetadataReport.FailureReason, "Could not find the given patient on PDS", 2], + [MetadataReport.FailureReason, "Lloyd George file already exists", 1], + ] + + actual = report.get_unsuccessful_reasons_data_rows() + + assert actual == expected + + +def test_ods_report_populate_report_empty_list_populates_successfully(): + expected = { + "generated_at": get_timestamp(), + "total_successful": set(), + "total_registered_elsewhere": set(), + "total_suspended": set(), + "total_deceased": set(), + "total_restricted": set(), + "report_items": [], + "failures_per_patient": {}, + "unique_failures": {}, + "uploader_ods_code": TEST_UPLOADER_ODS_1, + } + + actual = OdsReport( + get_timestamp(), + TEST_UPLOADER_ODS_1, + [], + ).__dict__ + + assert actual == expected + + +def test_ods_report_populate_report_returns_correct_statistics(): + actual = OdsReport( + get_timestamp(), + TEST_UPLOADER_ODS_1, + MOCK_REPORT_ITEMS_UPLOADER_1, + ) + + assert actual.get_total_successful_count() == 5 + assert actual.get_total_deceased_count() == 1 + assert actual.get_total_suspended_count() == 1 + assert actual.get_total_restricted_count() == 1 + assert actual.get_total_registered_elsewhere_count() == 1 + + +def test_ods_report_populate_report_empty_list_returns_correct_statistics(): + actual = OdsReport( + get_timestamp(), + TEST_UPLOADER_ODS_1, + [], + ) + + assert actual.get_total_successful_count() == 0 + assert actual.get_total_deceased_count() == 0 + assert actual.get_total_suspended_count() == 0 + assert actual.get_total_restricted_count() == 0 + assert actual.get_total_registered_elsewhere_count() == 0 + + +def test_summary_report_populate_report_populates_successfully(): + test_uploader_reports = [ + OdsReport( + get_timestamp(), + TEST_UPLOADER_ODS_1, + MOCK_REPORT_ITEMS_UPLOADER_1, + ), + OdsReport( + get_timestamp(), + TEST_UPLOADER_ODS_2, + MOCK_REPORT_ITEMS_UPLOADER_2, + ), + ] + + expected = { + "generated_at": get_timestamp(), + "total_successful": { + ("9000000000", "2012-01-13"), + ("9000000001", "2012-01-13"), + ("9000000002", "2012-01-13"), + ("9000000003", "2012-01-13"), + ("9000000004", "2012-01-13"), + ("9000000009", "2012-01-13"), + ("9000000010", "2012-01-13"), + ("9000000011", "2012-01-13"), + ("9000000012", "2012-01-13"), + ("9000000013", "2012-01-13"), + }, + "total_registered_elsewhere": { + ("9000000004", "2012-01-13"), + ("9000000012", "2012-01-13"), + }, + "total_suspended": {("9000000000", "2012-01-13"), ("9000000009", "2012-01-13")}, + "total_deceased": { + ("9000000001", "2012-01-13", "Patient is deceased - INFORMAL"), + ("9000000010", "2012-01-13", "Patient is deceased - FORMAL"), + }, + "total_restricted": { + ("9000000002", "2012-01-13"), + ("9000000011", "2012-01-13"), + }, + "ods_reports": test_uploader_reports, + "success_summary": [ + ["Success by ODS", "Y12345", 5], + ["Success by ODS", "Z12345", 5], + ], + "reason_summary": [ + ["FailureReason for Y12345", "Could not find the given patient on PDS", 2], + ["FailureReason for Y12345", "Lloyd George file already exists", 1], + ["FailureReason for Z12345", "Could not find the given patient on PDS", 2], + ["FailureReason for Z12345", "Lloyd George file already exists", 1], + ], + } + + actual = SummaryReport( + generated_at=get_timestamp(), ods_reports=test_uploader_reports + ).__dict__ + + assert actual == expected + + +def test_summary_report_populate_report_empty_reports_objects_populate_successfully(): + test_uploader_reports = [ + OdsReport( + get_timestamp(), + TEST_UPLOADER_ODS_1, + [], + ), + OdsReport( + get_timestamp(), + TEST_UPLOADER_ODS_2, + [], + ), + ] + + expected = { + "generated_at": get_timestamp(), + "total_successful": set(), + "total_registered_elsewhere": set(), + "total_suspended": set(), + "total_deceased": set(), + "total_restricted": set(), + "ods_reports": test_uploader_reports, + "success_summary": [ + ["Success by ODS", "Y12345", 0], + ["Success by ODS", "Z12345", 0], + ], + "reason_summary": [], + } + + actual = SummaryReport( + generated_at=get_timestamp(), ods_reports=test_uploader_reports + ).__dict__ + + assert actual == expected + + +def test_summary_report_populate_report_no_report_objects_populate_successfully(): + expected = { + "generated_at": get_timestamp(), + "total_successful": set(), + "total_registered_elsewhere": set(), + "total_suspended": set(), + "total_deceased": set(), + "total_restricted": set(), + "ods_reports": [], + "success_summary": [["Success by ODS", "No ODS codes found", 0]], + "reason_summary": [], + } + + actual = SummaryReport(generated_at=get_timestamp(), ods_reports=[]).__dict__ + + assert actual == expected diff --git a/lambdas/tests/unit/services/test_bulk_upload_report_service.py b/lambdas/tests/unit/services/test_bulk_upload_report_service.py index 5d491623a..fb6ad191c 100644 --- a/lambdas/tests/unit/services/test_bulk_upload_report_service.py +++ b/lambdas/tests/unit/services/test_bulk_upload_report_service.py @@ -10,67 +10,25 @@ MOCK_BULK_REPORT_TABLE_NAME, MOCK_STATISTICS_REPORT_BUCKET_NAME, TEST_CURRENT_GP_ODS, - TEST_UUID, +) +from tests.unit.helpers.data.bulk_upload.dynamo_responses import ( + MOCK_REPORT_ITEMS_ALL, + MOCK_REPORT_ITEMS_UPLOADER_1, + MOCK_REPORT_RESPONSE_ALL, + MOCK_REPORT_RESPONSE_ALL_WITH_LAST_KEY, + TEST_UPLOADER_ODS_1, + TEST_UPLOADER_ODS_2, ) from tests.unit.helpers.data.bulk_upload.test_data import readfile from tests.unit.helpers.data.dynamo_scan_response import ( - EXPECTED_RESPONSE, MOCK_EMPTY_RESPONSE, - MOCK_RESPONSE, - MOCK_RESPONSE_WITH_LAST_KEY, UNEXPECTED_RESPONSE, ) -from tests.unit.models.test_bulk_upload_status import ( - MOCK_DATA_COMPLETE_UPLOAD, - MOCK_DATA_FAILED_UPLOAD, -) +from utils.utilities import generate_date_folder_name MOCK_END_REPORT_TIME = datetime(2012, 1, 14, 7, 0, 0, 0) MOCK_START_REPORT_TIME = datetime(2012, 1, 13, 7, 0, 0, 0) -MOCK_BULK_REPORT_TABLE_RESPONSE = [ - { - "Timestamp": 1688395680, - "Date": "2012-01-13", - "NhsNumber": "9000000011", - "FilePath": "/9000000011/1of1_Lloyd_George_Record_[NAME]_[9000000011]_[DOB].pdf", - "UploaderOdsCode": "TEST", - "UploadStatus": "complete", - "ID": TEST_UUID, - "PdsOdsCode": "TEST", - }, - { - "Timestamp": 1688395680, - "Date": "2012-01-13", - "NhsNumber": "9000000011", - "FilePath": "/9000000011/1of1_Lloyd_George_Record_[NAME]_[9000000011]_[DOB].pdf", - "UploaderOdsCode": "TEST", - "UploadStatus": "complete", - "ID": TEST_UUID, - "PdsOdsCode": "TEST", - }, - { - "Timestamp": 1688395681, - "Date": "2012-01-13", - "NhsNumber": "9000000010", - "FilePath": "/9000000010/1of2_Lloyd_George_Record_[NAME_2]_[9000000010]_[DOB].pdf", - "UploaderOdsCode": "TEST", - "FailureReason": "Could not find the given patient on PDS", - "UploadStatus": "failed", - "ID": TEST_UUID, - "PdsOdsCode": "", - }, - { - "Timestamp": 1688395681, - "Date": "2012-01-13", - "NhsNumber": "9000000010", - "FilePath": "/9000000010/2of2_Lloyd_George_Record_[NAME_2]_[9000000010]_[DOB].pdf", - "UploaderOdsCode": "TEST", - "FailureReason": "Could not find the given patient on PDS", - "UploadStatus": "failed", - "ID": TEST_UUID, - "PdsOdsCode": "", - }, -] +MOCK_TIMESTAMP = MOCK_START_REPORT_TIME.strftime("%Y%m%d") @pytest.fixture @@ -102,12 +60,17 @@ def mock_get_db_with_data(mocker, bulk_upload_report_service): yield mocker.patch.object( bulk_upload_report_service, "get_dynamodb_report_items", - return_value=[MOCK_DATA_COMPLETE_UPLOAD], + return_value=MOCK_REPORT_ITEMS_ALL, ) @pytest.fixture def mock_get_times_for_scan(bulk_upload_report_service, mocker): + mock_date_folder_name = generate_date_folder_name(MOCK_TIMESTAMP) + bulk_upload_report_service.generated_on = MOCK_TIMESTAMP + bulk_upload_report_service.s3_key_prefix = ( + f"bulk-upload-reports/{mock_date_folder_name}" + ) yield mocker.patch.object( bulk_upload_report_service, "get_times_for_scan", @@ -115,9 +78,19 @@ def mock_get_times_for_scan(bulk_upload_report_service, mocker): ) +@pytest.fixture +def mock_filter(mocker): + mock_filter = Attr("Timestamp").gt(MOCK_START_REPORT_TIME) & Attr("Timestamp").lt( + MOCK_END_REPORT_TIME + ) + + mocker.patch("boto3.dynamodb.conditions.And", return_value=mock_filter) + + yield mock_filter + + @freeze_time("2012-01-14 7:20:01") def test_get_time_for_scan_after_7am(bulk_upload_report_service): - ( actual_start_time, actual_end_time, @@ -155,21 +128,20 @@ def test_get_time_for_scan_at_7am(bulk_upload_report_service): assert expected_end_report_time == actual_end_time -def test_get_dynamo_data_2_calls(bulk_upload_report_service): - mock_filter = Attr("Timestamp").gt(MOCK_START_REPORT_TIME) & Attr("Timestamp").lt( - MOCK_END_REPORT_TIME - ) - mock_last_key = {"FileName": "Screenshot 2023-08-15 at 16.17.56.png"} +def test_get_dynamo_data_2_calls(bulk_upload_report_service, mock_filter): + mock_last_key = { + "FilePath": "/9000000010/2of2_Lloyd_George_Record_[NAME_2]_[9000000010]_[DOB].pdf" + } bulk_upload_report_service.db_service.scan_table.side_effect = [ - MOCK_RESPONSE_WITH_LAST_KEY, - MOCK_RESPONSE, + MOCK_REPORT_RESPONSE_ALL_WITH_LAST_KEY, + MOCK_REPORT_RESPONSE_ALL, ] actual = bulk_upload_report_service.get_dynamodb_report_items( - MOCK_START_REPORT_TIME, MOCK_END_REPORT_TIME + int(MOCK_START_REPORT_TIME.timestamp()), int(MOCK_END_REPORT_TIME.timestamp()) ) - assert actual == EXPECTED_RESPONSE * 2 + assert actual == MOCK_REPORT_ITEMS_ALL * 2 assert bulk_upload_report_service.db_service.scan_table.call_count == 2 calls = [ call(MOCK_BULK_REPORT_TABLE_NAME, filter_expression=mock_filter), @@ -182,42 +154,67 @@ def test_get_dynamo_data_2_calls(bulk_upload_report_service): bulk_upload_report_service.db_service.scan_table.assert_has_calls(calls) -def test_get_dynamo_data_with_no_start_key(bulk_upload_report_service): - mock_filter = Attr("Timestamp").gt(MOCK_START_REPORT_TIME) & Attr("Timestamp").lt( - MOCK_END_REPORT_TIME +def test_get_dynamo_data_handles_invalid_dynamo_data( + bulk_upload_report_service, mock_filter, caplog +): + invalid_data = { + "Timestamp": 1688395680, + "Date": "2012-01-13", + "FailureReason": "Lloyd George file already exists", + "UploadStatus": "failed", + } + mock_response = {"Items": [invalid_data, MOCK_REPORT_RESPONSE_ALL["Items"][1]]} + expected_message = "Failed to parse bulk update report dynamo item" + + bulk_upload_report_service.db_service.scan_table.side_effect = [ + mock_response, + ] + + actual = bulk_upload_report_service.get_dynamodb_report_items( + int(MOCK_START_REPORT_TIME.timestamp()), int(MOCK_END_REPORT_TIME.timestamp()) ) - bulk_upload_report_service.db_service.scan_table.side_effect = [MOCK_RESPONSE] + + assert actual == [MOCK_REPORT_ITEMS_ALL[1]] + assert expected_message in caplog.records[-1].msg + + +def test_get_dynamo_data_with_no_start_key(bulk_upload_report_service, mock_filter): + bulk_upload_report_service.db_service.scan_table.side_effect = [ + MOCK_REPORT_RESPONSE_ALL + ] actual = bulk_upload_report_service.get_dynamodb_report_items( - MOCK_START_REPORT_TIME, MOCK_END_REPORT_TIME + int(MOCK_START_REPORT_TIME.timestamp()), int(MOCK_END_REPORT_TIME.timestamp()) ) - assert actual == EXPECTED_RESPONSE + assert actual == MOCK_REPORT_ITEMS_ALL bulk_upload_report_service.db_service.scan_table.assert_called_once() bulk_upload_report_service.db_service.scan_table.assert_called_with( MOCK_BULK_REPORT_TABLE_NAME, filter_expression=mock_filter ) -def test_get_dynamo_data_with_no_items(bulk_upload_report_service): +def test_get_dynamo_data_with_no_items_returns_empty_list(bulk_upload_report_service): bulk_upload_report_service.db_service.scan_table.side_effect = [MOCK_EMPTY_RESPONSE] actual = bulk_upload_report_service.get_dynamodb_report_items( - MOCK_START_REPORT_TIME, MOCK_END_REPORT_TIME + int(MOCK_START_REPORT_TIME.timestamp()), int(MOCK_END_REPORT_TIME.timestamp()) ) assert actual == [] bulk_upload_report_service.db_service.scan_table.assert_called_once() -def test_get_dynamo_data_with_bad_response(bulk_upload_report_service): +def test_get_dynamo_data_with_bad_response_returns_empty_list( + bulk_upload_report_service, +): bulk_upload_report_service.db_service.scan_table.side_effect = [UNEXPECTED_RESPONSE] actual = bulk_upload_report_service.get_dynamodb_report_items( - MOCK_START_REPORT_TIME, MOCK_END_REPORT_TIME + int(MOCK_START_REPORT_TIME.timestamp()), int(MOCK_END_REPORT_TIME.timestamp()) ) - assert actual is None + assert actual == [] bulk_upload_report_service.db_service.scan_table.assert_called_once() @@ -228,7 +225,6 @@ def test_report_handler_no_items_returns_expected_log( mock_write_items_to_csv, mock_get_times_for_scan, ): - expected_message = "No data found, no new report file to upload" mock_get_db_report_items.return_value = [] bulk_upload_report_service.report_handler() @@ -254,12 +250,28 @@ def test_report_handler_with_items_uploads_summary_report_to_bucket( caplog, ): expected_messages = [ + "Bulk upload reports for 2012-01-13 07:00:00 to 2012-01-14 07:00:00.csv", + "Generating ODS report file for Y12345", + "Uploading ODS report file for Y12345 to S3", + "Generating ODS report file for Z12345", + "Uploading ODS report file for Z12345 to S3", "Successfully processed daily ODS reports", + "Uploading daily summary report file to S3", "Successfully processed daily summary report", + "Uploading daily report file to S3", "Successfully processed daily report", + "Uploading daily success report file to S3", + "Successfully processed success report", + "Uploading daily suspended report file to S3", + "Successfully processed suspended report", + "Uploading daily deceased report file to S3", + "Successfully processed deceased report", + "Uploading daily restricted report file to S3", + "Successfully processed restricted report", + "Uploading daily rejected report file to S3", + "Successfully processed rejected report", ] - mock_date_string = MOCK_END_REPORT_TIME.strftime("%Y%m%d") bulk_upload_report_service.report_handler() mock_get_times_for_scan.assert_called_once() @@ -273,22 +285,54 @@ def test_report_handler_with_items_uploads_summary_report_to_bucket( calls = [ call( s3_bucket_name=MOCK_STATISTICS_REPORT_BUCKET_NAME, - file_key=f"daily_statistical_report_bulk_upload_summary_{mock_date_string}_uploaded_by_Y12345.csv", - file_name=f"/tmp/daily_statistical_report_bulk_upload_summary_{mock_date_string}_uploaded_by_Y12345.csv", + file_key=f"bulk-upload-reports/2012-01-13/daily_statistical_report_bulk_upload_ods_summary_{MOCK_TIMESTAMP}_uploaded_by_Y12345.csv", + file_name=f"/tmp/daily_statistical_report_bulk_upload_ods_summary_{MOCK_TIMESTAMP}_uploaded_by_Y12345.csv", + ), + call( + s3_bucket_name=MOCK_STATISTICS_REPORT_BUCKET_NAME, + file_key=f"bulk-upload-reports/2012-01-13/daily_statistical_report_bulk_upload_ods_summary_{MOCK_TIMESTAMP}_uploaded_by_Z12345.csv", + file_name=f"/tmp/daily_statistical_report_bulk_upload_ods_summary_{MOCK_TIMESTAMP}_uploaded_by_Z12345.csv", + ), + call( + s3_bucket_name=MOCK_STATISTICS_REPORT_BUCKET_NAME, + file_key=f"bulk-upload-reports/2012-01-13/daily_statistical_report_bulk_upload_summary_{MOCK_TIMESTAMP}.csv", + file_name=f"/tmp/daily_statistical_report_bulk_upload_summary_{MOCK_TIMESTAMP}.csv", + ), + call( + s3_bucket_name=MOCK_STATISTICS_REPORT_BUCKET_NAME, + file_key=f"bulk-upload-reports/2012-01-13/daily_statistical_report_entire_bulk_upload_{str(MOCK_START_REPORT_TIME)}_to_{str(MOCK_END_REPORT_TIME)}.csv", + file_name=f"/tmp/daily_statistical_report_entire_bulk_upload_{str(MOCK_START_REPORT_TIME)}_to_{str(MOCK_END_REPORT_TIME)}.csv", + ), + call( + s3_bucket_name=MOCK_STATISTICS_REPORT_BUCKET_NAME, + file_key=f"bulk-upload-reports/2012-01-13/daily_statistical_report_bulk_upload_success_{MOCK_TIMESTAMP}.csv", + file_name=f"/tmp/daily_statistical_report_bulk_upload_success_{MOCK_TIMESTAMP}.csv", + ), + call( + s3_bucket_name=MOCK_STATISTICS_REPORT_BUCKET_NAME, + file_key=f"bulk-upload-reports/2012-01-13/daily_statistical_report_bulk_upload_suspended_{MOCK_TIMESTAMP}.csv", + file_name=f"/tmp/daily_statistical_report_bulk_upload_suspended_{MOCK_TIMESTAMP}.csv", + ), + call( + s3_bucket_name=MOCK_STATISTICS_REPORT_BUCKET_NAME, + file_key=f"bulk-upload-reports/2012-01-13/daily_statistical_report_bulk_upload_deceased_{MOCK_TIMESTAMP}.csv", + file_name=f"/tmp/daily_statistical_report_bulk_upload_deceased_{MOCK_TIMESTAMP}.csv", ), call( s3_bucket_name=MOCK_STATISTICS_REPORT_BUCKET_NAME, - file_key=f"daily-reports/daily_statistical_report_bulk_upload_summary_{mock_date_string}.csv", - file_name=f"/tmp/daily_statistical_report_bulk_upload_summary_{mock_date_string}.csv", + file_key=f"bulk-upload-reports/2012-01-13/daily_statistical_report_bulk_upload_restricted_{MOCK_TIMESTAMP}.csv", + file_name=f"/tmp/daily_statistical_report_bulk_upload_restricted_{MOCK_TIMESTAMP}.csv", ), call( s3_bucket_name=MOCK_STATISTICS_REPORT_BUCKET_NAME, - file_key=f"daily-reports/Bulk upload report for {str(MOCK_END_REPORT_TIME)}.csv", - file_name=f"/tmp/Bulk upload report for {str(MOCK_END_REPORT_TIME)}.csv", + file_key=f"bulk-upload-reports/2012-01-13/daily_statistical_report_bulk_upload_rejected_{MOCK_TIMESTAMP}.csv", + file_name=f"/tmp/daily_statistical_report_bulk_upload_rejected_{MOCK_TIMESTAMP}.csv", ), ] - bulk_upload_report_service.s3_service.upload_file.has_calls(calls) + bulk_upload_report_service.s3_service.upload_file.assert_has_calls( + calls, any_order=False + ) log_message_match = set(expected_messages).issubset(caplog.messages) @@ -296,36 +340,42 @@ def test_report_handler_with_items_uploads_summary_report_to_bucket( def test_generate_individual_ods_report_creates_ods_report( - bulk_upload_report_service, mock_write_summary_data_to_csv + bulk_upload_report_service, mock_write_summary_data_to_csv, mock_get_times_for_scan ): - mock_ods_report_data = [MOCK_DATA_COMPLETE_UPLOAD, MOCK_DATA_FAILED_UPLOAD] expected = OdsReport( - TEST_CURRENT_GP_ODS, - 1, - 0, - 0, - {"File name not matching Lloyd George naming convention": 1}, + MOCK_TIMESTAMP, + TEST_UPLOADER_ODS_1, + MOCK_REPORT_ITEMS_UPLOADER_1, ) + actual = bulk_upload_report_service.generate_individual_ods_report( - TEST_CURRENT_GP_ODS, mock_ods_report_data, MOCK_END_REPORT_TIME + TEST_UPLOADER_ODS_1, MOCK_REPORT_ITEMS_UPLOADER_1 ) assert actual.__dict__ == expected.__dict__ + + mock_write_summary_data_to_csv.assert_called_with( + file_name=f"daily_statistical_report_bulk_upload_ods_summary_{MOCK_TIMESTAMP}_uploaded_by_{TEST_CURRENT_GP_ODS}.csv", + total_successful=5, + total_registered_elsewhere=1, + total_suspended=1, + extra_rows=[ + ["FailureReason", "Could not find the given patient on PDS", 2], + ["FailureReason", "Lloyd George file already exists", 1], + ], + ) bulk_upload_report_service.s3_service.upload_file.assert_called() - mock_write_summary_data_to_csv.assert_called() -def test_generate_individual_ods_report_writes_csv_report(bulk_upload_report_service): - mock_ods_report_data = [MOCK_DATA_COMPLETE_UPLOAD, MOCK_DATA_FAILED_UPLOAD] - mock_date = 20120114 - mock_file_name = f"daily_statistical_report_bulk_upload_summary_{mock_date}_uploaded_by_{TEST_CURRENT_GP_ODS}.csv" +def test_generate_individual_ods_report_writes_csv_report( + bulk_upload_report_service, mock_get_times_for_scan +): + mock_file_name = f"daily_statistical_report_bulk_upload_ods_summary_{MOCK_TIMESTAMP}_uploaded_by_{TEST_CURRENT_GP_ODS}.csv" bulk_upload_report_service.generate_individual_ods_report( - TEST_CURRENT_GP_ODS, - mock_ods_report_data, - mock_date, + TEST_UPLOADER_ODS_1, MOCK_REPORT_ITEMS_UPLOADER_1 ) - expected = readfile("expected_bulk_upload_report.csv") + expected = readfile("expected_ods_report_for_uploader_1.csv") with open(f"/tmp/{mock_file_name}") as test_file: actual = test_file.read() assert expected == actual @@ -333,29 +383,49 @@ def test_generate_individual_ods_report_writes_csv_report(bulk_upload_report_ser bulk_upload_report_service.s3_service.upload_file.assert_called_with( s3_bucket_name=MOCK_STATISTICS_REPORT_BUCKET_NAME, - file_key=f"{mock_file_name}", + file_key=f"bulk-upload-reports/2012-01-13/{mock_file_name}", file_name=f"/tmp/{mock_file_name}", ) -def test_generate_summary_report_with_two_ods_reports(bulk_upload_report_service): - mock_ods_report_data = [ - MOCK_DATA_COMPLETE_UPLOAD, - MOCK_DATA_FAILED_UPLOAD, - MOCK_BULK_REPORT_TABLE_RESPONSE[0], - MOCK_BULK_REPORT_TABLE_RESPONSE[2], - ] - mock_file_name = ( - f"daily_statistical_report_bulk_upload_summary_{MOCK_END_REPORT_TIME}.csv" +def test_generate_ods_reports_writes_multiple_ods_reports( + bulk_upload_report_service, mock_get_times_for_scan +): + mock_file_name_uploader_1 = ( + f"daily_statistical_report_bulk_upload_ods_summary_{MOCK_TIMESTAMP}" + f"_uploaded_by_{TEST_UPLOADER_ODS_1}.csv" + ) + mock_file_name_uploader_2 = ( + f"daily_statistical_report_bulk_upload_ods_summary_{MOCK_TIMESTAMP}" + f"_uploaded_by_{TEST_UPLOADER_ODS_2}.csv" ) - ods_reports = bulk_upload_report_service.generate_ods_reports( - mock_ods_report_data, MOCK_END_REPORT_TIME + bulk_upload_report_service.generate_ods_reports( + MOCK_REPORT_ITEMS_ALL, ) - assert len(ods_reports) == 2 - bulk_upload_report_service.generate_summary_report( - ods_reports, MOCK_END_REPORT_TIME + expected = readfile("expected_ods_report_for_uploader_1.csv") + with open(f"/tmp/{mock_file_name_uploader_1}") as test_file: + actual = test_file.read() + assert expected == actual + os.remove(f"/tmp/{mock_file_name_uploader_1}") + + expected = readfile("expected_ods_report_for_uploader_2.csv") + with open(f"/tmp/{mock_file_name_uploader_2}") as test_file: + actual = test_file.read() + assert expected == actual + os.remove(f"/tmp/{mock_file_name_uploader_2}") + + +def test_generate_summary_report_with_two_ods_reports( + bulk_upload_report_service, mock_get_times_for_scan +): + mock_file_name = ( + f"daily_statistical_report_bulk_upload_summary_{MOCK_TIMESTAMP}.csv" ) + + ods_reports = bulk_upload_report_service.generate_ods_reports(MOCK_REPORT_ITEMS_ALL) + assert len(ods_reports) == 2 + bulk_upload_report_service.generate_summary_report(ods_reports) expected = readfile("expected_bulk_upload_summary_report.csv") with open(f"/tmp/{mock_file_name}") as test_file: actual = test_file.read() @@ -363,26 +433,131 @@ def test_generate_summary_report_with_two_ods_reports(bulk_upload_report_service os.remove(f"/tmp/{mock_file_name}") -def test_reports_count_individual_patients_success_and_failures( - bulk_upload_report_service, +def test_generate_success_report_writes_csv( + bulk_upload_report_service, mock_get_times_for_scan ): mock_file_name = ( - f"daily_statistical_report_bulk_upload_summary_{MOCK_END_REPORT_TIME}.csv" + f"daily_statistical_report_bulk_upload_success_{MOCK_TIMESTAMP}.csv" ) - mock_ods_report_data = [ - MOCK_DATA_COMPLETE_UPLOAD, - MOCK_DATA_FAILED_UPLOAD, - ] + MOCK_BULK_REPORT_TABLE_RESPONSE - ods_reports = bulk_upload_report_service.generate_ods_reports( - mock_ods_report_data, MOCK_END_REPORT_TIME + test_ods_reports = bulk_upload_report_service.generate_ods_reports( + MOCK_REPORT_ITEMS_ALL, ) - bulk_upload_report_service.generate_summary_report( - ods_reports, MOCK_END_REPORT_TIME + bulk_upload_report_service.generate_success_report(test_ods_reports) + + expected = readfile("expected_success_report.csv") + with open(f"/tmp/{mock_file_name}") as test_file: + actual = test_file.read() + assert expected == actual + os.remove(f"/tmp/{mock_file_name}") + + bulk_upload_report_service.s3_service.upload_file.assert_called_with( + s3_bucket_name=MOCK_STATISTICS_REPORT_BUCKET_NAME, + file_key=f"bulk-upload-reports/2012-01-13/{mock_file_name}", + file_name=f"/tmp/{mock_file_name}", ) - expected = readfile("expected_bulk_upload_summary_report.csv") + + +def test_generate_suspended_report_writes_csv( + bulk_upload_report_service, mock_get_times_for_scan +): + mock_file_name = ( + f"daily_statistical_report_bulk_upload_suspended_{MOCK_TIMESTAMP}.csv" + ) + + test_ods_reports = bulk_upload_report_service.generate_ods_reports( + MOCK_REPORT_ITEMS_ALL, + ) + + bulk_upload_report_service.generate_suspended_report(test_ods_reports) + + expected = readfile("expected_suspended_report.csv") + with open(f"/tmp/{mock_file_name}") as test_file: + actual = test_file.read() + assert expected == actual + os.remove(f"/tmp/{mock_file_name}") + + bulk_upload_report_service.s3_service.upload_file.assert_called_with( + s3_bucket_name=MOCK_STATISTICS_REPORT_BUCKET_NAME, + file_key=f"bulk-upload-reports/2012-01-13/{mock_file_name}", + file_name=f"/tmp/{mock_file_name}", + ) + + +def test_generate_deceased_report_writes_csv( + bulk_upload_report_service, mock_get_times_for_scan +): + mock_file_name = ( + f"daily_statistical_report_bulk_upload_deceased_{MOCK_TIMESTAMP}.csv" + ) + + test_ods_reports = bulk_upload_report_service.generate_ods_reports( + MOCK_REPORT_ITEMS_ALL, + ) + + bulk_upload_report_service.generate_deceased_report(test_ods_reports) + + expected = readfile("expected_deceased_report.csv") with open(f"/tmp/{mock_file_name}") as test_file: actual = test_file.read() assert expected == actual os.remove(f"/tmp/{mock_file_name}") + + bulk_upload_report_service.s3_service.upload_file.assert_called_with( + s3_bucket_name=MOCK_STATISTICS_REPORT_BUCKET_NAME, + file_key=f"bulk-upload-reports/2012-01-13/{mock_file_name}", + file_name=f"/tmp/{mock_file_name}", + ) + + +def test_generate_restricted_report_writes_csv( + bulk_upload_report_service, mock_get_times_for_scan +): + mock_file_name = ( + f"daily_statistical_report_bulk_upload_restricted_{MOCK_TIMESTAMP}.csv" + ) + + test_ods_reports = bulk_upload_report_service.generate_ods_reports( + MOCK_REPORT_ITEMS_ALL, + ) + + bulk_upload_report_service.generate_restricted_report(test_ods_reports) + + expected = readfile("expected_restricted_report.csv") + with open(f"/tmp/{mock_file_name}") as test_file: + actual = test_file.read() + assert expected == actual + os.remove(f"/tmp/{mock_file_name}") + + bulk_upload_report_service.s3_service.upload_file.assert_called_with( + s3_bucket_name=MOCK_STATISTICS_REPORT_BUCKET_NAME, + file_key=f"bulk-upload-reports/2012-01-13/{mock_file_name}", + file_name=f"/tmp/{mock_file_name}", + ) + + +def test_generate_rejected_report_writes_csv( + bulk_upload_report_service, mock_get_times_for_scan +): + mock_file_name = ( + f"daily_statistical_report_bulk_upload_rejected_{MOCK_TIMESTAMP}.csv" + ) + + test_ods_reports = bulk_upload_report_service.generate_ods_reports( + MOCK_REPORT_ITEMS_ALL, + ) + + bulk_upload_report_service.generate_rejected_report(test_ods_reports) + + expected = readfile("expected_rejected_report.csv") + with open(f"/tmp/{mock_file_name}") as test_file: + actual = test_file.read() + assert expected == actual + os.remove(f"/tmp/{mock_file_name}") + + bulk_upload_report_service.s3_service.upload_file.assert_called_with( + s3_bucket_name=MOCK_STATISTICS_REPORT_BUCKET_NAME, + file_key=f"bulk-upload-reports/2012-01-13/{mock_file_name}", + file_name=f"/tmp/{mock_file_name}", + ) diff --git a/lambdas/tests/unit/services/test_statistical_report_service.py b/lambdas/tests/unit/services/test_statistical_report_service.py index 492c38c5d..70e8ad11a 100644 --- a/lambdas/tests/unit/services/test_statistical_report_service.py +++ b/lambdas/tests/unit/services/test_statistical_report_service.py @@ -365,13 +365,15 @@ def test_join_dataframes_by_ods_code_can_handle_empty_dataframe(mock_service): @freeze_time("20240512T07:00:00Z") def test_store_report_to_s3(set_env, mock_s3_service, mock_temp_folder): mock_weekly_summary = EXPECTED_WEEKLY_SUMMARY + expected_date_folder = "2024-05-11" expected_filename = "statistical_report_20240505-20240511.csv" - expected_local_file_path = f"{mock_temp_folder}/{expected_filename}" service = StatisticalReportService() service.store_report_to_s3(mock_weekly_summary) mock_s3_service.upload_file.assert_called_with( - expected_local_file_path, MOCK_STATISTICS_REPORT_BUCKET_NAME, expected_filename + s3_bucket_name=MOCK_STATISTICS_REPORT_BUCKET_NAME, + file_key=f"statistic-reports/{expected_date_folder}/{expected_filename}", + file_name=f"{mock_temp_folder}/{expected_filename}", ) diff --git a/lambdas/utils/utilities.py b/lambdas/utils/utilities.py index 7c3493d59..271a24bf0 100755 --- a/lambdas/utils/utilities.py +++ b/lambdas/utils/utilities.py @@ -2,6 +2,7 @@ import os import re import uuid +from datetime import datetime from urllib.parse import urlparse from inflection import camelize @@ -59,3 +60,8 @@ def get_file_key_from_s3_url(s3_url: str) -> str: def flatten(nested_list: list[list]) -> list: return list(itertools.chain(*nested_list)) + + +def generate_date_folder_name(date: str) -> str: + date_obj = datetime.strptime(date, "%Y%m%d") + return date_obj.strftime("%Y-%m-%d")