Skip to content

Commit

Permalink
[PATCH] PRMDR-738 - Add DynamoDb Query Filter module (#325)
Browse files Browse the repository at this point in the history
* add Dynamo Filter module

* remove ttl from metadata fields list

* remove redundant field from metadata fields list

* change lastupdated data type

* refactor query builder

* fix tests

* add dynamo exceptions to query filter builder

* add exception tests
  • Loading branch information
abbas-khan10 authored Mar 21, 2024
1 parent 6c6c565 commit c668169
Show file tree
Hide file tree
Showing 16 changed files with 322 additions and 197 deletions.
15 changes: 15 additions & 0 deletions lambdas/enums/dynamo_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from enum import Enum


class AttributeOperator(Enum):
EQUAL = "eq"
NOT_EQUAL = "ne"
GREATER_THAN = "gt"
GREATER_OR_EQUAL = "gte"
LESS_THAN = "lt"
LESS_THAN_OR_EQUAL = "lte"


class ConditionOperator(Enum):
OR = "|"
AND = "&"
5 changes: 3 additions & 2 deletions lambdas/enums/metadata_field_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ class DocumentReferenceMetadataFields(Enum):
FILE_LOCATION = "FileLocation"
NHS_NUMBER = "NhsNumber"
TTL = "TTL"
TYPE = "Type"
VIRUS_SCANNER_RESULT = "VirusScannerResult"
CURRENT_GP_ODS = "CurrentGpOds"
UPLOADED = "Uploaded"
Expand All @@ -19,7 +18,9 @@ class DocumentReferenceMetadataFields(Enum):

@staticmethod
def list() -> list[str]:
return [str(field.value) for field in DocumentReferenceMetadataFields]
fields = [str(field.value) for field in DocumentReferenceMetadataFields]
fields.remove(DocumentReferenceMetadataFields.TTL.value)
return fields


class DocumentZipTraceFields(Enum):
Expand Down
2 changes: 1 addition & 1 deletion lambdas/models/document_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class DocumentReference(BaseModel):

uploaded: bool = Field(alias=str(DocumentReferenceMetadataFields.UPLOADED.value))
uploading: bool = Field(alias=str(DocumentReferenceMetadataFields.UPLOADING.value))
last_updated: str = Field(
last_updated: int = Field(
alias=str(DocumentReferenceMetadataFields.LAST_UPDATED.value),
serialization_alias="lastUpdated",
)
Expand Down
23 changes: 6 additions & 17 deletions lambdas/services/base/dynamo_service.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import boto3
from boto3.dynamodb.conditions import Key
from boto3.dynamodb.conditions import Attr, ConditionBase, Key
from botocore.exceptions import ClientError
from utils.audit_logging_setup import LoggingService
from utils.dynamo_utils import (
create_attribute_filter,
create_expression_attribute_values,
create_expressions,
create_update_expression,
Expand Down Expand Up @@ -40,8 +39,8 @@ def query_with_requested_fields(
index_name,
search_key,
search_condition: str,
requested_fields: list = None,
filtered_fields: dict = None,
requested_fields: list[str] = None,
query_filter: Attr | ConditionBase = None,
):
try:
table = self.get_table(table_name)
Expand All @@ -52,29 +51,19 @@ def query_with_requested_fields(
"Unable to query DynamoDB with empty requested fields"
)

projection_expression, expression_attribute_names = create_expressions(
requested_fields
)
projection_expression = ",".join(requested_fields)

if not filtered_fields:
if not query_filter:
results = table.query(
IndexName=index_name,
KeyConditionExpression=Key(search_key).eq(search_condition),
ExpressionAttributeNames=expression_attribute_names,
ProjectionExpression=projection_expression,
)
else:
fields_filter = create_attribute_filter(filtered_fields)
expression_attribute_values = create_expression_attribute_values(
filtered_fields
)

results = table.query(
IndexName=index_name,
KeyConditionExpression=Key(search_key).eq(search_condition),
FilterExpression=fields_filter,
ExpressionAttributeNames=expression_attribute_names,
ExpressionAttributeValues=expression_attribute_values,
FilterExpression=query_filter,
ProjectionExpression=projection_expression,
)

Expand Down
14 changes: 12 additions & 2 deletions lambdas/services/document_reference_search_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
from json import JSONDecodeError

from botocore.exceptions import ClientError
from enums.dynamo_filter import AttributeOperator
from enums.lambda_error import LambdaError
from enums.metadata_field_names import DocumentReferenceMetadataFields
from models.document_reference import DocumentReference
from pydantic import ValidationError
from services.document_service import DocumentService
from utils.audit_logging_setup import LoggingService
from utils.dynamo_query_filter_builder import DynamoQueryFilterBuilder
from utils.exceptions import DynamoServiceException
from utils.lambda_exceptions import DocumentRefSearchException

Expand All @@ -19,16 +21,24 @@ class DocumentReferenceSearchService(DocumentService):
def get_document_references(self, nhs_number: str):
try:
list_of_table_names = json.loads(os.environ["DYNAMODB_TABLE_LIST"])

results: list[dict] = []

filter_builder = DynamoQueryFilterBuilder()
delete_filter_expression = filter_builder.add_condition(
attribute=str(DocumentReferenceMetadataFields.DELETED.value),
attr_operator=AttributeOperator.EQUAL,
filter_value="",
).build()

for table_name in list_of_table_names:
logger.info(f"Searching for results in {table_name}")

documents: list[
DocumentReference
] = self.fetch_documents_from_table_with_filter(
nhs_number,
table_name,
attr_filter={DocumentReferenceMetadataFields.DELETED.value: ""},
query_filter=delete_filter_expression,
)

results.extend(
Expand Down
15 changes: 9 additions & 6 deletions lambdas/services/document_service.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from datetime import datetime, timezone

from boto3.dynamodb.conditions import Attr, ConditionBase
from enums.metadata_field_names import DocumentReferenceMetadataFields
from enums.s3_lifecycle_tags import S3LifecycleDays, S3LifecycleTags
from enums.supported_document_types import SupportedDocumentTypes
Expand All @@ -17,21 +18,23 @@ def __init__(self):
self.dynamo_service = DynamoDBService()

def fetch_available_document_references_by_type(
self, nhs_number: str, doc_type: SupportedDocumentTypes
self,
nhs_number: str,
doc_type: SupportedDocumentTypes,
query_filter: Attr | ConditionBase,
) -> list[DocumentReference]:
results: list[DocumentReference] = []
delete_filter = {DocumentReferenceMetadataFields.DELETED.value: ""}

doc_type_table = doc_type.get_dynamodb_table_name()
if isinstance(doc_type_table, list):
for table in doc_type_table:
results += self.fetch_documents_from_table_with_filter(
nhs_number, table, attr_filter=delete_filter
nhs_number, table, query_filter=query_filter
)
return results

return self.fetch_documents_from_table_with_filter(
nhs_number, doc_type_table, attr_filter=delete_filter
nhs_number, doc_type_table, query_filter=query_filter
)

def fetch_documents_from_table(
Expand All @@ -52,7 +55,7 @@ def fetch_documents_from_table(
return documents

def fetch_documents_from_table_with_filter(
self, nhs_number: str, table: str, attr_filter: dict
self, nhs_number: str, table: str, query_filter: Attr | ConditionBase
) -> list[DocumentReference]:
documents = []

Expand All @@ -62,7 +65,7 @@ def fetch_documents_from_table_with_filter(
search_key="NhsNumber",
search_condition=nhs_number,
requested_fields=DocumentReferenceMetadataFields.list(),
filtered_fields=attr_filter,
query_filter=query_filter,
)

for item in response["Items"]:
Expand Down
4 changes: 1 addition & 3 deletions lambdas/tests/unit/enums/test_metadata_field_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,14 @@ def test_can_get_one_field_name():

def test_returns_all_as_list():
subject = DocumentReferenceMetadataFields.list()
assert len(subject) == 14
assert len(subject) == 12
assert DocumentReferenceMetadataFields.ID.value in subject
assert DocumentReferenceMetadataFields.CONTENT_TYPE.value in subject
assert DocumentReferenceMetadataFields.CREATED.value in subject
assert DocumentReferenceMetadataFields.DELETED.value in subject
assert DocumentReferenceMetadataFields.FILE_NAME.value in subject
assert DocumentReferenceMetadataFields.FILE_LOCATION.value in subject
assert DocumentReferenceMetadataFields.NHS_NUMBER.value in subject
assert DocumentReferenceMetadataFields.TYPE.value in subject
assert DocumentReferenceMetadataFields.TTL.value in subject
assert DocumentReferenceMetadataFields.VIRUS_SCANNER_RESULT.value in subject
assert DocumentReferenceMetadataFields.CURRENT_GP_ODS.value in subject
assert DocumentReferenceMetadataFields.UPLOADED.value in subject
Expand Down
12 changes: 6 additions & 6 deletions lambdas/tests/unit/helpers/data/dynamo_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
{
"ID": "3d8683b9-1665-40d2-8499-6e8302d507ff",
"ContentType": "type",
"Created": "2023-08-23T13:38:04.095Z",
"Created": "2024-01-01T12:00:00.000Z",
"Deleted": "",
"FileLocation": "s3://test-s3-bucket/9000000009/test-key-123",
"FileName": "document.csv",
Expand All @@ -12,12 +12,12 @@
"CurrentGpOds": "Y12345",
"Uploaded": "True",
"Uploading": "False",
"LastUpdated": "2023-08-23T13:38:04.095Z",
"LastUpdated": 1704110400, # Timestamp: 2024-01-01T12:00:00
},
{
"ID": "4d8683b9-1665-40d2-8499-6e8302d507ff",
"ContentType": "type",
"Created": "2023-08-23T13:38:04.095Z",
"Created": "2024-01-01T12:00:00.000Z",
"Deleted": "",
"FileLocation": "s3://test-s3-bucket/9000000009/test-key-223",
"FileName": "results.pdf",
Expand All @@ -26,12 +26,12 @@
"CurrentGpOds": "Y12345",
"Uploaded": "True",
"Uploading": "False",
"LastUpdated": "2023-08-23T13:38:04.095Z",
"LastUpdated": 1704110400, # Timestamp: 2024-01-01T12:00:00
},
{
"ID": "5d8683b9-1665-40d2-8499-6e8302d507ff",
"ContentType": "type",
"Created": "2023-08-24T14:38:04.095Z",
"Created": "2024-01-01T12:00:00.000Z",
"Deleted": "",
"FileLocation": "s3://test-s3-bucket/9000000009/test-key-323",
"FileName": "output.csv",
Expand All @@ -40,7 +40,7 @@
"CurrentGpOds": "Y12345",
"Uploaded": "True",
"Uploading": "False",
"LastUpdated": "2023-08-23T13:38:04.095Z",
"LastUpdated": 1704110400, # Timestamp: 2024-01-01T12:00:00
},
],
"Count": 3,
Expand Down
37 changes: 19 additions & 18 deletions lambdas/tests/unit/services/base/test_dynamo_service.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import pytest
from boto3.dynamodb.conditions import Key
from boto3.dynamodb.conditions import Attr, Key
from botocore.exceptions import ClientError
from enums.dynamo_filter import AttributeOperator
from enums.metadata_field_names import DocumentReferenceMetadataFields
from services.base.dynamo_service import DynamoDBService
from tests.unit.conftest import MOCK_TABLE_NAME, TEST_NHS_NUMBER
from tests.unit.helpers.data.dynamo_responses import MOCK_SEARCH_RESPONSE
from utils.dynamo_query_filter_builder import DynamoQueryFilterBuilder
from utils.exceptions import DynamoServiceException

MOCK_CLIENT_ERROR = ClientError(
Expand All @@ -30,15 +32,22 @@ def mock_table(mocker, mock_service):
yield mocker.patch.object(mock_service, "get_table")


@pytest.fixture
def mock_filter_expression():
filter_builder = DynamoQueryFilterBuilder()
filter_expression = filter_builder.add_condition(
attribute=str(DocumentReferenceMetadataFields.DELETED.value),
attr_operator=AttributeOperator.EQUAL,
filter_value="",
).build()
yield filter_expression


def test_query_with_requested_fields_returns_items_from_dynamo(
mock_service, mock_table
):
search_key_obj = Key("NhsNumber").eq(TEST_NHS_NUMBER)
expected_projection = "#FileName_attr,#Created_attr"
expected_expr_attr_names = {
"#FileName_attr": "FileName",
"#Created_attr": "Created",
}
expected_projection = "FileName,Created"

mock_table.return_value.query.return_value = MOCK_SEARCH_RESPONSE
expected = MOCK_SEARCH_RESPONSE
Expand All @@ -58,24 +67,18 @@ def test_query_with_requested_fields_returns_items_from_dynamo(
mock_table.return_value.query.assert_called_once_with(
IndexName="NhsNumberIndex",
KeyConditionExpression=search_key_obj,
ExpressionAttributeNames=expected_expr_attr_names,
ProjectionExpression=expected_projection,
)

assert expected == actual


def test_query_with_requested_fields_with_filter_returns_items_from_dynamo(
mock_service, mock_table
mock_service, mock_table, mock_filter_expression
):
search_key_obj = Key("NhsNumber").eq(TEST_NHS_NUMBER)
expected_projection = "#FileName_attr,#Created_attr"
expected_expr_attr_names = {
"#FileName_attr": "FileName",
"#Created_attr": "Created",
}
expected_filter = "attribute_not_exists(Deleted) OR Deleted = :Deleted_val"
expected_attributes_values = {":Deleted_val": ""}
expected_projection = "FileName,Created"
expected_filter = Attr("Deleted").eq("")

mock_table.return_value.query.return_value = MOCK_SEARCH_RESPONSE
expected = MOCK_SEARCH_RESPONSE
Expand All @@ -89,17 +92,15 @@ def test_query_with_requested_fields_with_filter_returns_items_from_dynamo(
DocumentReferenceMetadataFields.FILE_NAME.value,
DocumentReferenceMetadataFields.CREATED.value,
],
filtered_fields={DocumentReferenceMetadataFields.DELETED.value: ""},
query_filter=mock_filter_expression,
)

mock_table.assert_called_with(MOCK_TABLE_NAME)
mock_table.return_value.query.assert_called_once_with(
IndexName="NhsNumberIndex",
KeyConditionExpression=search_key_obj,
ExpressionAttributeNames=expected_expr_attr_names,
ProjectionExpression=expected_projection,
FilterExpression=expected_filter,
ExpressionAttributeValues=expected_attributes_values,
)

assert expected == actual
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
]

EXPECTED_RESPONSE = {
"created": "2023-08-23T13:38:04.095Z",
"created": "2024-01-01T12:00:00.000Z",
"fileName": "document.csv",
"virusScannerResult": "Clean",
}
Expand Down
Loading

0 comments on commit c668169

Please sign in to comment.