From 8b5cc60301eb9e4b8fdc04debd52fa6f3099bb25 Mon Sep 17 00:00:00 2001 From: Joe Fong Date: Mon, 10 Jun 2024 10:26:28 +0100 Subject: [PATCH] [PRMDR-919] Add error handling for the case if no data exist for the whole week --- .../services/statistical_report_service.py | 57 +++++++++++-------- .../test_statistical_report_service.py | 16 +++++- lambdas/utils/exceptions.py | 4 ++ 3 files changed, 51 insertions(+), 26 deletions(-) diff --git a/lambdas/services/statistical_report_service.py b/lambdas/services/statistical_report_service.py index 225d3bb1b..bf9b506f2 100644 --- a/lambdas/services/statistical_report_service.py +++ b/lambdas/services/statistical_report_service.py @@ -18,6 +18,7 @@ from services.base.dynamo_service import DynamoDBService from services.base.s3_service import S3Service from utils.audit_logging_setup import LoggingService +from utils.exceptions import StatisticDataNotFoundException logger = LoggingService(__name__) @@ -33,12 +34,10 @@ def __init__(self): last_seven_days = [ datetime.today() - timedelta(days=i) for i in range(7, 0, -1) ] - self.report_period: list[str] = [ + self.dates_to_collect: list[str] = [ date.strftime("%Y%m%d") for date in last_seven_days ] - self.date_period_in_output_filename = ( - f"{self.report_period[0]}-{self.report_period[-1]}" - ) + self.report_period = f"{self.dates_to_collect[0]}-{self.dates_to_collect[-1]}" def make_weekly_summary_and_output_to_bucket(self) -> None: weekly_summary = self.make_weekly_summary() @@ -53,29 +52,40 @@ def make_weekly_summary(self) -> pl.DataFrame: weekly_organisation_data = self.summarise_organisation_data(organisation_data) weekly_application_data = self.summarise_application_data(application_data) - combined_data = self.join_dataframes_by_ods_code( - [ - weekly_record_store_data, - weekly_organisation_data, - weekly_application_data, - ] - ) + all_summarised_data = [ + weekly_record_store_data, + weekly_organisation_data, + weekly_application_data, + ] + + combined_data = self.join_dataframes_by_ods_code(all_summarised_data) weekly_summary = self.tidy_up_data(combined_data) return weekly_summary def get_statistic_data(self) -> LoadedStatisticData: logger.info("Loading statistic data of previous week from dynamodb...") - logger.info(f"The period to report: {self.report_period}") + logger.info(f"The period to report: {self.dates_to_collect}") dynamodb_items = [] - for date in self.report_period: + for date in self.dates_to_collect: response = self.dynamo_service.query_all_fields( table_name=self.statistic_table, key_condition_expression=Key("Date").eq(date), ) dynamodb_items.extend(response["Items"]) - return load_from_dynamodb_items(dynamodb_items) + loaded_data = load_from_dynamodb_items(dynamodb_items) + + all_data_empty = all(not data for data in loaded_data) + if all_data_empty: + logger.error( + f"No statistic data can be found during the period {self.report_period}. " + "Please check whether the data collection lambda worked properly.", + {"Result": "Statistic data not available."}, + ) + raise StatisticDataNotFoundException() + + return loaded_data @staticmethod def load_data_to_polars(data: list[StatisticData]) -> pl.DataFrame: @@ -153,16 +163,17 @@ def summarise_application_data( return summarised_data def join_dataframes_by_ods_code( - self, summarised_data: list[pl.DataFrame] + self, all_summarised_data: list[pl.DataFrame] ) -> pl.DataFrame: - non_empty_data = [df for df in summarised_data if not df.is_empty()] - joined_data = non_empty_data[0] - for other_df in non_empty_data[1:]: - joined_data = joined_data.join( - other_df, on="ods_code", how="outer_coalesce" + data_to_report = [df for df in all_summarised_data if not df.is_empty()] + joined_dataframe = data_to_report[0] + + for other_dataframe in data_to_report[1:]: + joined_dataframe = joined_dataframe.join( + other_dataframe, on="ods_code", how="outer_coalesce" ) - return joined_data + return joined_dataframe def tidy_up_data(self, joined_data: pl.DataFrame) -> pl.DataFrame: with_date_column_updated = self.update_date_column(joined_data) @@ -175,7 +186,7 @@ def tidy_up_data(self, joined_data: pl.DataFrame) -> pl.DataFrame: def update_date_column(self, joined_data: pl.DataFrame) -> pl.DataFrame: date_column_filled_with_report_period = joined_data.with_columns( - pl.lit(self.date_period_in_output_filename).alias("date") + pl.lit(self.report_period).alias("date") ) return date_column_filled_with_report_period @@ -197,7 +208,7 @@ def rename_snakecase_columns(column_name: str) -> str: def store_report_to_s3(self, weekly_summary: pl.DataFrame) -> None: logger.info("Saving the weekly report as .csv") - file_name = f"statistical_report_{self.date_period_in_output_filename}.csv" + file_name = f"statistical_report_{self.report_period}.csv" temp_folder = tempfile.mkdtemp() local_file_path = os.path.join(temp_folder, file_name) try: diff --git a/lambdas/tests/unit/services/test_statistical_report_service.py b/lambdas/tests/unit/services/test_statistical_report_service.py index 893263024..f6f5f4137 100644 --- a/lambdas/tests/unit/services/test_statistical_report_service.py +++ b/lambdas/tests/unit/services/test_statistical_report_service.py @@ -34,6 +34,7 @@ MOCK_RECORD_STORE_DATA_2, MOCK_RECORD_STORE_DATA_3, ) +from utils.exceptions import StatisticDataNotFoundException @pytest.fixture @@ -72,7 +73,7 @@ def mock_temp_folder(mocker): def test_datetime_correctly_configured_during_initialise(set_env): service = StatisticalReportService() - assert service.report_period == [ + assert service.dates_to_collect == [ "20240530", "20240531", "20240601", @@ -81,7 +82,7 @@ def test_datetime_correctly_configured_during_initialise(set_env): "20240604", "20240605", ] - assert service.date_period_in_output_filename == "20240530-20240605" + assert service.report_period == "20240530-20240605" @freeze_time("20240512T07:00:00Z") @@ -97,7 +98,7 @@ def test_make_weekly_summary(set_env, mocker): def test_get_statistic_data(mock_dynamodb_service, mock_service): - mock_service.report_period = ["20240510", "20240511"] + mock_service.dates_to_collect = ["20240510", "20240511"] mock_dynamodb_service.query_all_fields.side_effect = MOCK_DYNAMODB_QUERY_RESPONSE actual = mock_service.get_statistic_data() @@ -119,6 +120,15 @@ def test_get_statistic_data(mock_dynamodb_service, mock_service): mock_dynamodb_service.query_all_fields.assert_has_calls(expected_calls) +def test_get_statistic_data_raise_error_if_all_data_are_empty( + mock_dynamodb_service, mock_service +): + mock_dynamodb_service.query_all_fields.return_value = {"Items": []} + + with pytest.raises(StatisticDataNotFoundException): + mock_service.get_statistic_data() + + def test_summarise_record_store_data(mock_service): actual = mock_service.summarise_record_store_data( [MOCK_RECORD_STORE_DATA_1, MOCK_RECORD_STORE_DATA_2, MOCK_RECORD_STORE_DATA_3] diff --git a/lambdas/utils/exceptions.py b/lambdas/utils/exceptions.py index 35f6ac6ad..098ae2891 100644 --- a/lambdas/utils/exceptions.py +++ b/lambdas/utils/exceptions.py @@ -118,3 +118,7 @@ class FileUploadInProgress(Exception): class LogsQueryException(Exception): pass + + +class StatisticDataNotFoundException(Exception): + pass