Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SES and SQS client #9

Merged
merged 6 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
236 changes: 118 additions & 118 deletions Pipfile.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions dsc/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class InvalidSQSMessageError(Exception):
pass
Empty file added dsc/utilities/__init__.py
Empty file.
Empty file added dsc/utilities/aws/__init__.py
Empty file.
File renamed without changes.
84 changes: 84 additions & 0 deletions dsc/utilities/aws/ses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from __future__ import annotations

import logging
from email.mime.application import MIMEApplication
from email.mime.multipart import MIMEMultipart
from typing import TYPE_CHECKING

from boto3 import client

if TYPE_CHECKING:
from mypy_boto3_ses.type_defs import SendRawEmailResponseTypeDef

logger = logging.getLogger(__name__)


class SESClient:
"""A class to perform common SES operations for this application."""

def __init__(self, region: str) -> None:
self.client = client("ses", region_name=region)

def create_and_send_email(
self,
subject: str,
attachment_content: str,
attachment_name: str,
source_email_address: str,
recipient_email_address: str,
) -> None:
"""Create an email message and send it via SES.

Args:
subject: The subject of the email.
attachment_content: The content of the email attachment.
attachment_name: The name of the email attachment.
source_email_address: The email address of the sender.
recipient_email_address: The email address of the receipient.
"""
message = self._create_email(subject, attachment_content, attachment_name)
self._send_email(source_email_address, recipient_email_address, message)
logger.debug(f"Logs sent to {recipient_email_address}")

def _create_email(
self,
subject: str,
attachment_content: str,
attachment_name: str,
) -> MIMEMultipart:
"""Create an email.

Args:
subject: The subject of the email.
attachment_content: The content of the email attachment.
attachment_name: The name of the email attachment.
"""
message = MIMEMultipart()
message["Subject"] = subject
attachment_object = MIMEApplication(attachment_content)
attachment_object.add_header(
"Content-Disposition", "attachment", filename=attachment_name
)
message.attach(attachment_object)
return message

def _send_email(
self,
source_email_address: str,
recipient_email_address: str,
message: MIMEMultipart,
) -> SendRawEmailResponseTypeDef:
"""Send email via SES.

Args:
source_email_address: The email address of the sender.
recipient_email_address: The email address of the receipient.
message: The message to be sent.
"""
return self.client.send_raw_email(
Source=source_email_address,
Destinations=[recipient_email_address],
RawMessage={
"Data": message.as_string(),
},
)
215 changes: 215 additions & 0 deletions dsc/utilities/aws/sqs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
from __future__ import annotations

import json
import logging
from typing import TYPE_CHECKING, Any

from boto3 import client

from dsc.exceptions import InvalidSQSMessageError

if TYPE_CHECKING:
from collections.abc import Iterator, Mapping

from mypy_boto3_sqs.type_defs import (
EmptyResponseMetadataTypeDef,
MessageAttributeValueTypeDef,
MessageTypeDef,
SendMessageResultTypeDef,
)

logger = logging.getLogger(__name__)


class SQSClient:
"""A class to perform common SQS operations for this application."""

def __init__(
self, region: str, queue_name: str, queue_url: str | None = None
) -> None:
self.client = client("sqs", region_name=region)
self.queue_name = queue_name
self._queue_url: str | None = queue_url

@property
def queue_url(self) -> str:
"""Property to provide QueueUrl, caching it for reuse."""
if not self._queue_url:
self._queue_url = self.get_queue_url()
return self._queue_url

def get_queue_url(self) -> str:
"""Get SQS queue URL from name."""
return self.client.get_queue_url(QueueName=self.queue_name)["QueueUrl"]

@staticmethod
def create_dss_message_attributes(
package_id: str, submission_source: str, output_queue: str
) -> dict[str, Any]:
"""Create attributes for a DSpace Submission Service message.

Args:
package_id: The PackageID field which is populated by the submission's
identifier.
submission_source: The source for the submission.
output_queue: The SQS output queue used for retrieving result messages.
"""
return {
"PackageID": {"DataType": "String", "StringValue": package_id},
"SubmissionSource": {"DataType": "String", "StringValue": submission_source},
"OutputQueue": {"DataType": "String", "StringValue": output_queue},
}

@staticmethod
def create_dss_message_body(
submission_system: str,
collection_handle: str,
metadata_s3_uri: str,
bitstream_file_name: str,
bitstream_s3_uri: str,
) -> str:
"""Create body for a DSpace Submission Service message.

Args:
submission_system: The system where the article is uploaded.
collection_handle: The handle of collection where the article is uploaded.
metadata_s3_uri: The S3 URI for the metadata JSON file.
bitstream_file_name: The file name for the article content which is uploaded as a
bitstream.
bitstream_s3_uri: The S3 URI for the article content file.
"""
return json.dumps(
{
"SubmissionSystem": submission_system,
"CollectionHandle": collection_handle,
"MetadataLocation": metadata_s3_uri,
"Files": [
{
"BitstreamName": bitstream_file_name,
"FileLocation": bitstream_s3_uri,
"BitstreamDescription": None,
}
],
}
)

def delete(self, receipt_handle: str) -> EmptyResponseMetadataTypeDef:
"""Delete message from SQS queue.

Args:
receipt_handle: The receipt handle of the message to be deleted.
"""
logger.debug("Deleting '{receipt_handle}' from SQS queue: {self.queue_name}")
response = self.client.delete_message(
QueueUrl=self.queue_url,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ReceiptHandle=receipt_handle,
)
logger.debug(f"Message deleted from SQS queue: {response}")

return response

def process_result_message(self, sqs_message: MessageTypeDef) -> tuple[str, str]:
"""Validate, extract data, and delete an SQS result message.

Args:
sqs_message: An SQS result message to be processed.
"""
if not self.validate_message(sqs_message):
raise InvalidSQSMessageError
identifier = sqs_message["MessageAttributes"]["PackageID"]["StringValue"]
message_body = json.loads(str(sqs_message["Body"]))
self.delete(sqs_message["ReceiptHandle"])
return identifier, message_body

def receive(self) -> Iterator[MessageTypeDef]:
"""Receive messages from SQS queue."""
logger.debug(f"Receiving messages from SQS queue: {self.queue_name}")
while True:
response = self.client.receive_message(
QueueUrl=self.queue_url,
MaxNumberOfMessages=10,
MessageAttributeNames=["All"],
)
if "Messages" in response:
for message in response["Messages"]:
logger.debug(
f"Message retrieved from SQS queue {self.queue_name}: {message}"
)
yield message
else:
logger.debug(f"No more messages from SQS queue: {self.queue_name}")
break

def send(
self,
message_attributes: Mapping[str, MessageAttributeValueTypeDef],
message_body: str,
) -> SendMessageResultTypeDef:
"""Send message via SQS.

Args:
message_attributes: The attributes of the message to send.
message_body: The body of the message to send.
"""
logger.debug(f"Sending message to SQS queue: {self.queue_name}")
response = self.client.send_message(
QueueUrl=self.queue_url,
MessageAttributes=message_attributes,
MessageBody=str(message_body),
)
logger.debug(f"Response from SQS queue: {response}")
return response

def validate_message(self, sqs_message: MessageTypeDef) -> bool:
Copy link

@ghukill ghukill Dec 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know that I submitted an approval already, and I do think this current approach will successfully validate messages, but taking another pass at the updates I do wonder if there is an opportunity to streamline some of this validation even more. I would imagine that much of this application will be about validation, so some patterns to follow throughout could be helpful to establish early.

I see that all this validation is under the umbrella of this call to self.validate_message() in self.process_result_message():

if not self.validate_message(sqs_message):
    raise InvalidSQSMessageError

Where the ultimate goal is to a) log what happend, and b) raise InvalidSQSMessageError from this method.

What if each validation helper method was responsible for directly raising InvalidSQSMessageError if they found something wrong? and self.validate_message() is just an orchestrator of calling these methods? Something along the lines of:

def self.validate_message() -> None:
    # call all validators here
    # if all pass, return None, and all is well!
    # if any raise an exception, it will bubble up from here and reach self.process_result_message()
    #   where this was originally called from

def validate_message_attributes() -> None:
   # each validator returns None
   # do checks, raise InvalidSQSMessageError with a meaningful message if something wrong

# continue to define validators as needed

I think this leans into the EAFP (Easier to Ask for Forgiveness than Permission) python pattern. If we call self.validate_message() and nothing happens, great. But if any of those validator methods raise that InvalidSQSMessageError exception, it will:

  1. get raised from that lowest validator method
  2. bubble up through validate_message()
  3. bubble up from process_results_message() here just as it does now

This pattern is extensible, becuase you can just keep calling validator methods, knowing they are responsible for raising that exception.

To reiterate: with only two validations here, I think the current approach works as-is. But I have this hunch that this DSC app will be validation heavy, and a validation pattern that is applied somewhat globally could be helpful.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like @ghukill 's proposal to have validate_message simply call validation methods that are responsible for raising InvalidSQSMessageError with meaningful messages.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you, this is definitely a better approach, I greatly appreciate the suggestion and I'll rework!

"""Validate that an SQS message is formatted as expected.

Args:
sqs_message: An SQS message to be evaluated.

"""
valid = False
if not sqs_message.get("ReceiptHandle"):
logger.exception(
f"Failed to retrieve 'ReceiptHandle' from message: {sqs_message}"
)
elif self.validate_message_attributes(
sqs_message=sqs_message
) and self.validate_message_body(sqs_message=sqs_message):
valid = True
return valid

@staticmethod
def validate_message_attributes(sqs_message: MessageTypeDef) -> bool:
"""Validate that "MessageAttributes" field is formatted as expected.

Args:
sqs_message: An SQS message to be evaluated.
"""
valid = False
if (
"MessageAttributes" in sqs_message
and any(
field
for field in sqs_message["MessageAttributes"]
if "PackageID" in field
)
and sqs_message["MessageAttributes"]["PackageID"].get("StringValue")
):
valid = True
else:
logger.exception(f"Failed to parse SQS message attributes: {sqs_message}")
return valid

@staticmethod
def validate_message_body(sqs_message: MessageTypeDef) -> bool:
"""Validate that "Body" field is formatted as expected.

Args:
sqs_message: An SQS message to be evaluated.
"""
valid = False
if "Body" in sqs_message and json.loads(str(sqs_message["Body"])):
valid = True
else:
logger.exception(f"Failed to parse SQS message body: {sqs_message}")
return valid
Loading
Loading