Skip to content

Commit

Permalink
[PRMDR-919] Add error handling for the case if no data exist for the …
Browse files Browse the repository at this point in the history
…whole week
  • Loading branch information
joefong-nhs committed Jun 10, 2024
1 parent 354b9f3 commit 8b5cc60
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 26 deletions.
57 changes: 34 additions & 23 deletions lambdas/services/statistical_report_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from services.base.dynamo_service import DynamoDBService
from services.base.s3_service import S3Service
from utils.audit_logging_setup import LoggingService
from utils.exceptions import StatisticDataNotFoundException

logger = LoggingService(__name__)

Expand All @@ -33,12 +34,10 @@ def __init__(self):
last_seven_days = [
datetime.today() - timedelta(days=i) for i in range(7, 0, -1)
]
self.report_period: list[str] = [
self.dates_to_collect: list[str] = [
date.strftime("%Y%m%d") for date in last_seven_days
]
self.date_period_in_output_filename = (
f"{self.report_period[0]}-{self.report_period[-1]}"
)
self.report_period = f"{self.dates_to_collect[0]}-{self.dates_to_collect[-1]}"

def make_weekly_summary_and_output_to_bucket(self) -> None:
weekly_summary = self.make_weekly_summary()
Expand All @@ -53,29 +52,40 @@ def make_weekly_summary(self) -> pl.DataFrame:
weekly_organisation_data = self.summarise_organisation_data(organisation_data)
weekly_application_data = self.summarise_application_data(application_data)

combined_data = self.join_dataframes_by_ods_code(
[
weekly_record_store_data,
weekly_organisation_data,
weekly_application_data,
]
)
all_summarised_data = [
weekly_record_store_data,
weekly_organisation_data,
weekly_application_data,
]

combined_data = self.join_dataframes_by_ods_code(all_summarised_data)
weekly_summary = self.tidy_up_data(combined_data)

return weekly_summary

def get_statistic_data(self) -> LoadedStatisticData:
logger.info("Loading statistic data of previous week from dynamodb...")
logger.info(f"The period to report: {self.report_period}")
logger.info(f"The period to report: {self.dates_to_collect}")
dynamodb_items = []
for date in self.report_period:
for date in self.dates_to_collect:
response = self.dynamo_service.query_all_fields(
table_name=self.statistic_table,
key_condition_expression=Key("Date").eq(date),
)
dynamodb_items.extend(response["Items"])

return load_from_dynamodb_items(dynamodb_items)
loaded_data = load_from_dynamodb_items(dynamodb_items)

all_data_empty = all(not data for data in loaded_data)
if all_data_empty:
logger.error(
f"No statistic data can be found during the period {self.report_period}. "
"Please check whether the data collection lambda worked properly.",
{"Result": "Statistic data not available."},
)
raise StatisticDataNotFoundException()

return loaded_data

@staticmethod
def load_data_to_polars(data: list[StatisticData]) -> pl.DataFrame:
Expand Down Expand Up @@ -153,16 +163,17 @@ def summarise_application_data(
return summarised_data

def join_dataframes_by_ods_code(
self, summarised_data: list[pl.DataFrame]
self, all_summarised_data: list[pl.DataFrame]
) -> pl.DataFrame:
non_empty_data = [df for df in summarised_data if not df.is_empty()]
joined_data = non_empty_data[0]
for other_df in non_empty_data[1:]:
joined_data = joined_data.join(
other_df, on="ods_code", how="outer_coalesce"
data_to_report = [df for df in all_summarised_data if not df.is_empty()]
joined_dataframe = data_to_report[0]

for other_dataframe in data_to_report[1:]:
joined_dataframe = joined_dataframe.join(
other_dataframe, on="ods_code", how="outer_coalesce"
)

return joined_data
return joined_dataframe

def tidy_up_data(self, joined_data: pl.DataFrame) -> pl.DataFrame:
with_date_column_updated = self.update_date_column(joined_data)
Expand All @@ -175,7 +186,7 @@ def tidy_up_data(self, joined_data: pl.DataFrame) -> pl.DataFrame:

def update_date_column(self, joined_data: pl.DataFrame) -> pl.DataFrame:
date_column_filled_with_report_period = joined_data.with_columns(
pl.lit(self.date_period_in_output_filename).alias("date")
pl.lit(self.report_period).alias("date")
)
return date_column_filled_with_report_period

Expand All @@ -197,7 +208,7 @@ def rename_snakecase_columns(column_name: str) -> str:

def store_report_to_s3(self, weekly_summary: pl.DataFrame) -> None:
logger.info("Saving the weekly report as .csv")
file_name = f"statistical_report_{self.date_period_in_output_filename}.csv"
file_name = f"statistical_report_{self.report_period}.csv"
temp_folder = tempfile.mkdtemp()
local_file_path = os.path.join(temp_folder, file_name)
try:
Expand Down
16 changes: 13 additions & 3 deletions lambdas/tests/unit/services/test_statistical_report_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
MOCK_RECORD_STORE_DATA_2,
MOCK_RECORD_STORE_DATA_3,
)
from utils.exceptions import StatisticDataNotFoundException


@pytest.fixture
Expand Down Expand Up @@ -72,7 +73,7 @@ def mock_temp_folder(mocker):
def test_datetime_correctly_configured_during_initialise(set_env):
service = StatisticalReportService()

assert service.report_period == [
assert service.dates_to_collect == [
"20240530",
"20240531",
"20240601",
Expand All @@ -81,7 +82,7 @@ def test_datetime_correctly_configured_during_initialise(set_env):
"20240604",
"20240605",
]
assert service.date_period_in_output_filename == "20240530-20240605"
assert service.report_period == "20240530-20240605"


@freeze_time("20240512T07:00:00Z")
Expand All @@ -97,7 +98,7 @@ def test_make_weekly_summary(set_env, mocker):


def test_get_statistic_data(mock_dynamodb_service, mock_service):
mock_service.report_period = ["20240510", "20240511"]
mock_service.dates_to_collect = ["20240510", "20240511"]
mock_dynamodb_service.query_all_fields.side_effect = MOCK_DYNAMODB_QUERY_RESPONSE

actual = mock_service.get_statistic_data()
Expand All @@ -119,6 +120,15 @@ def test_get_statistic_data(mock_dynamodb_service, mock_service):
mock_dynamodb_service.query_all_fields.assert_has_calls(expected_calls)


def test_get_statistic_data_raise_error_if_all_data_are_empty(
mock_dynamodb_service, mock_service
):
mock_dynamodb_service.query_all_fields.return_value = {"Items": []}

with pytest.raises(StatisticDataNotFoundException):
mock_service.get_statistic_data()


def test_summarise_record_store_data(mock_service):
actual = mock_service.summarise_record_store_data(
[MOCK_RECORD_STORE_DATA_1, MOCK_RECORD_STORE_DATA_2, MOCK_RECORD_STORE_DATA_3]
Expand Down
4 changes: 4 additions & 0 deletions lambdas/utils/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,7 @@ class FileUploadInProgress(Exception):

class LogsQueryException(Exception):
pass


class StatisticDataNotFoundException(Exception):
pass

0 comments on commit 8b5cc60

Please sign in to comment.