Skip to content

Commit

Permalink
PRMP 1036 - Fix PDF Intermittence
Browse files Browse the repository at this point in the history
  • Loading branch information
RioKnightleyNHS authored Oct 31, 2024
1 parent 9d9cb03 commit 5271e16
Show file tree
Hide file tree
Showing 10 changed files with 413 additions and 236 deletions.
10 changes: 3 additions & 7 deletions app/src/helpers/requests/getLloydGeorgeRecord.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,8 @@ export const pollForPresignedUrl = async ({
if (data.jobStatus === JOB_STATUS.COMPLETED && !data.presignedUrl.startsWith('https://')) {
return Promise.reject({ response: { status: 500 } });
}

return {
...data,
presignedUrl: `${data.presignedUrl}&origin=${
typeof window !== 'undefined' ? window.location.href : ''
}`,
};
const result: LloydGeorgeStitchResult = data;
return result;
};

export default getLloydGeorgeRecord;
19 changes: 18 additions & 1 deletion lambdas/enums/lambda_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,12 +396,29 @@ def to_str(self) -> str:
"""
EdgeMalformed = {
"err_code": "CE_5001",
"message": "Malformed event structure or missing data",
"message": "Malformed cloudfront request",
}

EdgeNoOrigin = {
"err_code": "CE_5002",
"message": "The request is missing an origin",
}

EdgeNoQuery = {
"err_code": "CE_5003",
"message": "The request is missing a querystring",
}

EdgeRequiredQuery = {
"err_code": "CE_5004",
"message": "Missing required querystring values",
}

EdgeRequiredHeaders = {
"err_code": "CE_5005",
"message": "Malformed header structure or missing data",
}

EdgeNoClient = {"err_code": "CE_4001", "message": "Document not found"}

"""
Expand Down
48 changes: 11 additions & 37 deletions lambdas/handlers/edge_presign_handler.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
import hashlib
import json
import logging
from urllib.parse import parse_qs

from enums.lambda_error import LambdaError
from services.edge_presign_service import EdgePresignService
from utils.decorators.handle_edge_exceptions import handle_edge_exceptions
from utils.decorators.override_error_check import override_error_check
from utils.decorators.set_audit_arg import set_request_context_for_logging
from utils.lambda_exceptions import CloudFrontEdgeException
from utils.decorators.validate_s3_request import validate_s3_request

logger = logging.getLogger()
logger.setLevel(logging.INFO)
Expand All @@ -17,40 +13,18 @@
@set_request_context_for_logging
@override_error_check
@handle_edge_exceptions
@validate_s3_request
def lambda_handler(event, context):
try:
request: dict = event["Records"][0]["cf"]["request"]
logger.info("CloudFront received S3 request", {"Result": {json.dumps(request)}})
uri: str = request.get("uri", "")
presign_query_string: str = request.get("querystring", "")

except (KeyError, IndexError) as e:
logger.error(
f"{str(e)}",
{"Result": {LambdaError.EdgeMalformed.to_str()}},
)
raise CloudFrontEdgeException(500, LambdaError.EdgeMalformed)

s3_presign_credentials = parse_qs(presign_query_string)
origin_url = s3_presign_credentials.get("origin", [""])[0]
if not origin_url:
logger.error(
"No Origin",
{"Result": {LambdaError.EdgeNoOrigin.to_str()}},
)
raise CloudFrontEdgeException(500, LambdaError.EdgeNoOrigin)

presign_string = f"{uri}?{presign_query_string}"
encoded_presign_string: str = presign_string.encode("utf-8")
presign_credentials_hash = hashlib.md5(encoded_presign_string).hexdigest()
request: dict = event["Records"][0]["cf"]["request"]
logger.info("Edge received S3 request")

edge_presign_service = EdgePresignService()
edge_presign_service.attempt_url_update(
uri_hash=presign_credentials_hash, origin_url=origin_url
)
request_values: dict = edge_presign_service.filter_request_values(request)
edge_presign_service.use_presign(request_values)

headers: dict = request.get("headers", {})
if "authorization" in headers:
del headers["authorization"]
forwarded_request: dict = edge_presign_service.update_s3_headers(
request, request_values
)

return request
logger.info("Edge forwarding S3 request")
return forwarded_request
61 changes: 53 additions & 8 deletions lambdas/services/edge_presign_service.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import hashlib
import re

from botocore.exceptions import ClientError
Expand All @@ -12,23 +13,37 @@


class EdgePresignService:

def __init__(self):
self.dynamo_service = DynamoDBService()
self.s3_service = S3Service()
self.ssm_service = SSMService()
self.table_name_ssm_param = "EDGE_REFERENCE_TABLE"

def attempt_url_update(self, uri_hash, origin_url) -> None:
def use_presign(self, request_values: dict):
uri: str = request_values["uri"]
querystring: str = request_values["querystring"]
domain_name: str = request_values["domain_name"]

presign_string: str = f"{uri}?{querystring}"
encoded_presign_string: str = presign_string.encode("utf-8")
presign_credentials_hash: str = hashlib.md5(encoded_presign_string).hexdigest()

self.attempt_presign_ingestion(
uri_hash=presign_credentials_hash,
domain_name=domain_name,
)

def attempt_presign_ingestion(self, uri_hash: str, domain_name: str) -> None:
try:
environment = self.extract_environment_from_url(origin_url)
environment = self.filter_domain_for_env(domain_name)
logger.info(f"Environment found: {environment}")
base_table_name: str = self.ssm_service.get_ssm_parameter(
self.table_name_ssm_param
)
formatted_table_name: str = self.extend_table_name(
base_table_name, environment
)

logger.info(f"Table: {formatted_table_name}")
self.dynamo_service.update_item(
table_name=formatted_table_name,
key=uri_hash,
Expand All @@ -40,13 +55,43 @@ def attempt_url_update(self, uri_hash, origin_url) -> None:
logger.error(f"{str(e)}", {"Result": LambdaError.EdgeNoClient.to_str()})
raise CloudFrontEdgeException(400, LambdaError.EdgeNoClient)

def extract_environment_from_url(self, url: str) -> str:
match = re.search(r"https://([^.]+)\.[^.]+\.[^.]+\.[^.]+", url)
@staticmethod
def update_s3_headers(request: dict, request_values: dict):
domain_name = request_values["domain_name"]
if "authorization" in request["headers"]:
del request["headers"]["authorization"]
request["headers"]["host"] = [{"key": "Host", "value": domain_name}]

return request

@staticmethod
def filter_request_values(request: dict) -> dict:
try:
uri: str = request["uri"]
querystring: str = request["querystring"]
headers: dict = request["headers"]
origin: str = request.get("origin", {})
domain_name: str = origin["s3"]["domainName"]
except KeyError as e:
logger.error(f"Missing request component: {str(e)}")
raise CloudFrontEdgeException(500, LambdaError.EdgeNoOrigin)

return {
"uri": uri,
"querystring": querystring,
"headers": headers,
"domain_name": domain_name,
}

@staticmethod
def filter_domain_for_env(domain_name: str) -> str:
match = re.match(r"^[^-]+(?:-[^-]+)?(?=-lloyd)", domain_name)
if match:
return match.group(1)
return match.group(0)
return ""

def extend_table_name(self, base_table_name, environment) -> str:
@staticmethod
def extend_table_name(base_table_name: str, environment: str) -> str:
if environment:
return f"{environment}_{base_table_name}"
return base_table_name
5 changes: 3 additions & 2 deletions lambdas/tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@

REGION_NAME = "eu-west-2"

MOCK_CLOUDFRONT_URL = "test-cloudfront-url.com"
MOCK_TABLE_NAME = "test-table"
MOCK_BUCKET = "test-s3-bucket"

MOCK_CLOUDFRONT_URL = "test-cloudfront-url.com"
MOCKED_LG_BUCKET_ENV = "test"
MOCKED_LG_BUCKET_URL = f"{MOCKED_LG_BUCKET_ENV}-lloyd-test-test.com"
MOCK_ARF_TABLE_NAME_ENV_NAME = "DOCUMENT_STORE_DYNAMODB_NAME"
MOCK_ARF_BUCKET_ENV_NAME = "DOCUMENT_STORE_BUCKET_NAME"

Expand Down
80 changes: 40 additions & 40 deletions lambdas/tests/unit/enums/test_edge_presign_values.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,58 @@
# test_enums.py

from enums.lambda_error import LambdaError
from tests.unit.conftest import MOCKED_LG_BUCKET_URL

ENV = "test"
MOCKED_AUTH_QUERY = (
"X-Amz-Algorithm=algo&X-Amz-Credential=cred&X-Amz-Date=date"
"&X-Amz-Expires=3600&X-Amz-SignedHeaders=signed"
"&X-Amz-Signature=sig&X-Amz-Security-Token=token"
)
MOCKED_PARTIAL_QUERY = (
"X-Amz-Algorithm=algo&X-Amz-Credential=cred&X-Amz-Date=date" "&X-Amz-Expires=3600"
)

TABLE_NAME = "CloudFrontEdgeReference"
MOCKED_HEADERS = {
"cloudfront-viewer-country": [{"key": "CloudFront-Viewer-Country", "value": "US"}],
"x-forwarded-for": [{"key": "X-Forwarded-For", "value": "1.2.3.4"}],
"host": [{"key": "Host", "value": MOCKED_LG_BUCKET_URL}],
}

NHS_DOMAIN = "example.gov.uk"
EXPECTED_EDGE_NO_QUERY_MESSAGE = LambdaError.EdgeNoQuery.value["message"]
EXPECTED_EDGE_NO_QUERY_ERROR_CODE = LambdaError.EdgeNoQuery.value["err_code"]
EXPECTED_EDGE_MALFORMED_QUERY_MESSAGE = LambdaError.EdgeRequiredQuery.value["message"]
EXPECTED_EDGE_MALFORMED_QUERY_ERROR_CODE = LambdaError.EdgeRequiredQuery.value[
"err_code"
]
EXPECTED_EDGE_MALFORMED_HEADER_MESSAGE = LambdaError.EdgeRequiredHeaders.value[
"message"
]
EXPECTED_EDGE_MALFORMED_HEADER_ERROR_CODE = LambdaError.EdgeRequiredHeaders.value[
"err_code"
]
EXPECTED_EDGE_NO_ORIGIN_ERROR_MESSAGE = LambdaError.EdgeNoOrigin.value["message"]
EXPECTED_EDGE_NO_ORIGIN_ERROR_CODE = LambdaError.EdgeNoOrigin.value["err_code"]

EXPECTED_EDGE_NO_CLIENT_ERROR_MESSAGE = LambdaError.EdgeNoClient.value["message"]

EXPECTED_EDGE_NO_CLIENT_ERROR_CODE = LambdaError.EdgeNoClient.value["err_code"]
EXPECTED_EDGE_MALFORMED_ERROR_MESSAGE = LambdaError.EdgeMalformed.value["message"]
EXPECTED_EDGE_MALFORMED_ERROR_CODE = LambdaError.EdgeMalformed.value["err_code"]

EXPECTED_DYNAMO_DB_CONDITION_EXPRESSION = (
"attribute_not_exists(IsRequested) OR IsRequested = :false"
)
EXPECTED_DYNAMO_DB_EXPRESSION_ATTRIBUTE_VALUES = {":false": False}

EXPECTED_SSM_PARAMETER_KEY = "EDGE_REFERENCE_TABLE"

EXPECTED_SUCCESS_RESPONSE = None

VALID_EVENT_MODEL = {
MOCK_S3_EDGE_EVENT = {
"Records": [
{
"cf": {
"request": {
"headers": {
"authorization": [
{"key": "Authorization", "value": "Bearer token"}
],
"host": [{"key": "Host", "value": NHS_DOMAIN}],
},
"querystring": f"origin=https://test.{NHS_DOMAIN}&other=param",
"headers": MOCKED_HEADERS,
"querystring": MOCKED_AUTH_QUERY,
"uri": "/some/path",
}
}
}
]
}

MISSING_ORIGIN_EVENT_MODEL = {
"Records": [
{
"cf": {
"request": {
"headers": {
"authorization": [
{"key": "Authorization", "value": "Bearer token"}
],
"host": [{"key": "Host", "value": NHS_DOMAIN}],
"origin": {
"s3": {
"authMethod": "none",
"customHeaders": {},
"domainName": MOCKED_LG_BUCKET_URL,
"path": "",
}
},
"querystring": "other=param",
"uri": "/some/path",
}
}
}
Expand Down
Loading

0 comments on commit 5271e16

Please sign in to comment.