Skip to content

Commit

Permalink
PRMP-1342: Create lambda to handle deleted document references (#487)
Browse files Browse the repository at this point in the history
* update tests and lambda decorator

* fix tests path

* add detail to log message

* [PRMP-1342] - Fix import error

---------

Co-authored-by: MohammadIqbalAD-NHS <127403145+MohammadIqbalAD-NHS@users.noreply.github.com>
Co-authored-by: Mohammad Iqbal <mohammad.iqbal27@nhs.net>
  • Loading branch information
3 people authored Jan 8, 2025
1 parent a70516c commit ff4250c
Show file tree
Hide file tree
Showing 30 changed files with 585 additions and 101 deletions.
14 changes: 14 additions & 0 deletions .github/workflows/base-lambdas-reusable-deploy-all.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,20 @@ jobs:
secrets:
AWS_ASSUME_ROLE: ${{ secrets.AWS_ASSUME_ROLE }}

deploy_delete_document_object_handler:
name: Deploy delete_document_object_handler
uses: ./.github/workflows/base-lambdas-reusable-deploy.yml
with:
environment: ${{ inputs.environment}}
python_version: ${{ inputs.python_version }}
build_branch: ${{ inputs.build_branch}}
sandbox: ${{ inputs.sandbox }}
lambda_handler_name: delete_document_object_handler
lambda_aws_name: DeleteDocumentObjectS3
lambda_layer_names: 'core_lambda_layer'
secrets:
AWS_ASSUME_ROLE: ${{ secrets.AWS_ASSUME_ROLE }}

deploy_document_manifest_job_lambda:
name: Deploy document_manifest_job_lambda
uses: ./.github/workflows/base-lambdas-reusable-deploy.yml
Expand Down
6 changes: 6 additions & 0 deletions lambdas/enums/document_retention.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from enum import IntEnum


class DocumentRetentionDays(IntEnum):
SOFT_DELETE = 56
DEATH = 3650
12 changes: 12 additions & 0 deletions lambdas/enums/lambda_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,14 @@ def to_str(self) -> str:
"""
Errors for DocumentDeletionServiceException
"""
DocDelInvalidStreamEvent = {
"err_code": "DDS_4001",
"message": "Failed to delete document object",
}
DocDelObjectFailure = {
"err_code": "DDS_4002",
"message": "Failed to delete document object",
}
DocDelClient = {
"err_code": "DDS_5001",
"message": "Failed to delete documents",
Expand Down Expand Up @@ -470,6 +478,10 @@ def to_str(self) -> str:
"err_code": "LGL_400",
"message": "Incomplete record, Failed to create document manifest",
}
DynamoInvalidStreamEvent = {
"err_code": "DBS_4001",
"message": "Failed to parse DynamoDb event stream",
}

MockError = {
"message": "Client error",
Expand Down
12 changes: 0 additions & 12 deletions lambdas/enums/s3_lifecycle_tags.py

This file was deleted.

59 changes: 59 additions & 0 deletions lambdas/handlers/delete_document_object_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from enums.lambda_error import LambdaError
from enums.logging_app_interaction import LoggingAppInteraction
from models.document_reference import DocumentReference
from pydantic.v1 import ValidationError
from services.document_deletion_service import DocumentDeletionService
from utils.audit_logging_setup import LoggingService
from utils.decorators.handle_lambda_exceptions import handle_lambda_exceptions
from utils.decorators.override_error_check import override_error_check
from utils.decorators.set_audit_arg import set_request_context_for_logging
from utils.decorators.validate_dynamo_stream_event import validate_dynamo_stream
from utils.dynamo_utils import parse_dynamo_record
from utils.lambda_exceptions import DocumentDeletionServiceException
from utils.lambda_response import ApiGatewayResponse
from utils.request_context import request_context

logger = LoggingService(__name__)


@set_request_context_for_logging
@override_error_check
@handle_lambda_exceptions
@validate_dynamo_stream
def lambda_handler(event, context):
request_context.app_interaction = LoggingAppInteraction.DELETE_RECORD.value

logger.info(
"Delete Document Object handler has been triggered by DynamoDb REMOVE event"
)
try:
event_record = event["Records"][0]

event_type = event_record.get("eventName")
deleted_dynamo_reference = event_record.get("dynamodb").get("OldImage", {})

if event_type != "REMOVE" or not deleted_dynamo_reference:
logger.error(
"Failed to extract deleted record from DynamoDb stream",
{"Results": "Failed to delete document"},
)
raise DocumentDeletionServiceException(
400, LambdaError.DynamoInvalidStreamEvent
)
parsed_dynamo_record = parse_dynamo_record(deleted_dynamo_reference)
document = DocumentReference.model_validate(parsed_dynamo_record)

deletion_service = DocumentDeletionService()
deletion_service.handle_object_delete(deleted_reference=document)
except (ValueError, ValidationError) as e:
logger.error(
f"Failed to parse Document Reference from deleted record: {str(e)}",
{"Results": "Failed to delete document"},
)
raise DocumentDeletionServiceException(
400, LambdaError.DynamoInvalidStreamEvent
)

return ApiGatewayResponse(
200, "Successfully deleted Document Reference object", "GET"
).create_api_gateway_response()
2 changes: 1 addition & 1 deletion lambdas/handlers/delete_document_reference_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def lambda_handler(event, context):

deletion_service = DocumentDeletionService()

files_deleted = deletion_service.handle_delete(nhs_number, document_types)
files_deleted = deletion_service.handle_reference_delete(nhs_number, document_types)
if files_deleted:
logger.info(
"Documents were deleted successfully", {"Result": "Successful deletion"}
Expand Down
39 changes: 34 additions & 5 deletions lambdas/services/document_deletion_service.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import os
import uuid
from typing import Literal
from urllib.parse import urlparse

from botocore.exceptions import ClientError
from enums.document_retention import DocumentRetentionDays
from enums.lambda_error import LambdaError
from enums.nrl_sqs_upload import NrlActionTypes
from enums.s3_lifecycle_tags import S3LifecycleTags
from enums.snomed_codes import SnomedCodes
from enums.supported_document_types import SupportedDocumentTypes
from models.document_reference import DocumentReference
Expand All @@ -15,7 +16,7 @@
from services.lloyd_george_stitch_job_service import LloydGeorgeStitchJobService
from utils.audit_logging_setup import LoggingService
from utils.common_query_filters import NotDeleted
from utils.exceptions import DynamoServiceException
from utils.exceptions import DocumentServiceException, DynamoServiceException
from utils.lambda_exceptions import DocumentDeletionServiceException

logger = LoggingService(__name__)
Expand All @@ -27,7 +28,7 @@ def __init__(self):
self.stitch_service = LloydGeorgeStitchJobService()
self.sqs_service = SQSService()

def handle_delete(
def handle_reference_delete(
self, nhs_number: str, doc_types: list[SupportedDocumentTypes]
) -> list[DocumentReference]:
files_deleted = []
Expand All @@ -38,6 +39,34 @@ def handle_delete(
self.send_sqs_message_to_remove_pointer(nhs_number)
return files_deleted

def handle_object_delete(self, deleted_reference: DocumentReference):
try:
s3_uri = deleted_reference.file_location

parsed_uri = urlparse(s3_uri)
bucket_name = parsed_uri.netloc
object_key = parsed_uri.path.lstrip("/")

if not bucket_name or not object_key:
raise DocumentDeletionServiceException(
400, LambdaError.DocDelObjectFailure
)

self.document_service.delete_document_object(
bucket=bucket_name, key=object_key
)

logger.info(
"Successfully deleted Document Reference S3 Object",
{"Result": "Successful deletion"},
)
except DocumentServiceException as e:
logger.error(
str(e),
{"Results": "Failed to delete document"},
)
raise DocumentDeletionServiceException(400, LambdaError.DocDelObjectFailure)

def get_documents_references_in_storage(
self,
nhs_number: str,
Expand Down Expand Up @@ -69,10 +98,10 @@ def delete_specific_doc_type(
try:
results = self.get_documents_references_in_storage(nhs_number, doc_type)
if results:
self.document_service.delete_documents(
self.document_service.delete_document_references(
table_name=doc_type.get_dynamodb_table_name(),
document_references=results,
type_of_delete=str(S3LifecycleTags.SOFT_DELETE.value),
document_ttl_days=DocumentRetentionDays.SOFT_DELETE,
)

logger.info(
Expand Down
47 changes: 28 additions & 19 deletions lambdas/services/document_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@

from boto3.dynamodb.conditions import Attr, ConditionBase
from enums.metadata_field_names import DocumentReferenceMetadataFields
from enums.s3_lifecycle_tags import S3LifecycleDays, S3LifecycleTags
from enums.supported_document_types import SupportedDocumentTypes
from models.document_reference import DocumentReference
from services.base.dynamo_service import DynamoDBService
from services.base.s3_service import S3Service
from utils.audit_logging_setup import LoggingService
from utils.dynamo_utils import filter_uploaded_docs_and_recently_uploading_docs
from utils.exceptions import FileUploadInProgress, NoAvailableDocument
from utils.exceptions import (
DocumentServiceException,
FileUploadInProgress,
NoAvailableDocument,
)

logger = LoggingService(__name__)

Expand Down Expand Up @@ -67,22 +70,15 @@ def fetch_documents_from_table_with_filter(
documents.append(document)
return documents

def delete_documents(
def delete_document_references(
self,
table_name: str,
document_references: list[DocumentReference],
type_of_delete: str,
document_ttl_days: int,
):
deletion_date = datetime.now(timezone.utc)

if type_of_delete == S3LifecycleTags.DEATH_DELETE.value:
ttl_days = S3LifecycleDays.DEATH_DELETE
tag_key = str(S3LifecycleTags.DEATH_DELETE.value)
else:
ttl_days = S3LifecycleDays.SOFT_DELETE
tag_key = str(S3LifecycleTags.SOFT_DELETE.value)

ttl_seconds = ttl_days * 24 * 60 * 60
ttl_seconds = document_ttl_days * 24 * 60 * 60
document_reference_ttl = int(deletion_date.timestamp() + ttl_seconds)

update_fields = {
Expand All @@ -95,17 +91,30 @@ def delete_documents(
logger.info(f"Deleting items in table: {table_name}")

for reference in document_references:
self.s3_service.create_object_tag(
file_key=reference.get_file_key(),
s3_bucket_name=reference.get_file_bucket(),
tag_key=tag_key,
tag_value=str(S3LifecycleTags.ENABLE_TAG.value),
)

self.dynamo_service.update_item(
table_name, reference.id, updated_fields=update_fields
)

def delete_document_object(self, bucket: str, key: str):
file_exists = self.s3_service.file_exist_on_s3(
s3_bucket_name=bucket, file_key=key
)

if not file_exists:
raise DocumentServiceException("Document does not exist in S3")

logger.info(
f"Located file `{key}` in `{bucket}`, attempting S3 object deletion"
)
self.s3_service.delete_object(s3_bucket_name=bucket, file_key=key)

file_exists = self.s3_service.file_exist_on_s3(
s3_bucket_name=bucket, file_key=key
)

if file_exists:
raise DocumentServiceException("Document located in S3 after deletion")

def update_documents(
self,
table_name: str,
Expand Down
6 changes: 6 additions & 0 deletions lambdas/tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from unittest import mock

import pytest
from botocore.exceptions import ClientError
from models.document_reference import DocumentReference
from models.pds_models import Patient, PatientDetails
from pydantic import ValidationError
Expand Down Expand Up @@ -292,6 +293,11 @@ class MockError(Enum):
}


MOCK_CLIENT_ERROR = ClientError(
{"Error": {"Code": 500, "Message": "Test error message"}}, "Query"
)


@pytest.fixture
def mock_temp_folder(mocker):
temp_folder = tempfile.mkdtemp()
Expand Down
Loading

0 comments on commit ff4250c

Please sign in to comment.