Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SES and SQS client #9

Merged
merged 6 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
242 changes: 121 additions & 121 deletions Pipfile.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions dsc/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class InvalidSQSMessageError(Exception):
pass
Empty file added dsc/utilities/__init__.py
Empty file.
Empty file added dsc/utilities/aws/__init__.py
Empty file.
File renamed without changes.
84 changes: 84 additions & 0 deletions dsc/utilities/aws/ses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from __future__ import annotations

import logging
from email.mime.application import MIMEApplication
from email.mime.multipart import MIMEMultipart
from typing import TYPE_CHECKING

from boto3 import client

if TYPE_CHECKING:
from mypy_boto3_ses.type_defs import SendRawEmailResponseTypeDef

logger = logging.getLogger(__name__)


class SESClient:
"""A class to perform common SES operations for this application."""

def __init__(self, region: str) -> None:
self.client = client("ses", region_name=region)

def create_and_send_email(
self,
subject: str,
attachment_content: str,
attachment_name: str,
source_email_address: str,
recipient_email_address: str,
) -> None:
"""Create an email message and send it via SES.

Args:
subject: The subject of the email.
attachment_content: The content of the email attachment.
attachment_name: The name of the email attachment.
source_email_address: The email address of the sender.
recipient_email_address: The email address of the receipient.
"""
message = self._create_email(subject, attachment_content, attachment_name)
self._send_email(source_email_address, recipient_email_address, message)
logger.debug(f"Logs sent to {recipient_email_address}")

def _create_email(
self,
subject: str,
attachment_content: str,
attachment_name: str,
) -> MIMEMultipart:
"""Create an email.

Args:
subject: The subject of the email.
attachment_content: The content of the email attachment.
attachment_name: The name of the email attachment.
"""
message = MIMEMultipart()
message["Subject"] = subject
attachment_object = MIMEApplication(attachment_content)
attachment_object.add_header(
"Content-Disposition", "attachment", filename=attachment_name
)
message.attach(attachment_object)
return message

def _send_email(
self,
source_email_address: str,
recipient_email_address: str,
message: MIMEMultipart,
) -> SendRawEmailResponseTypeDef:
"""Send email via SES.

Args:
source_email_address: The email address of the sender.
recipient_email_address: The email address of the receipient.
message: The message to be sent.
"""
return self.client.send_raw_email(
Source=source_email_address,
Destinations=[recipient_email_address],
RawMessage={
"Data": message.as_string(),
},
)
203 changes: 203 additions & 0 deletions dsc/utilities/aws/sqs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
from __future__ import annotations

import json
import logging
from typing import TYPE_CHECKING, Any

from boto3 import client

from dsc.exceptions import InvalidSQSMessageError

if TYPE_CHECKING:
from collections.abc import Iterator, Mapping

from mypy_boto3_sqs.type_defs import (
EmptyResponseMetadataTypeDef,
MessageAttributeValueTypeDef,
MessageTypeDef,
SendMessageResultTypeDef,
)

logger = logging.getLogger(__name__)


class SQSClient:
"""A class to perform common SQS operations for this application."""

def __init__(
self, region: str, queue_name: str, queue_url: str | None = None
) -> None:
self.client = client("sqs", region_name=region)
self.queue_name = queue_name
self._queue_url: str | None = queue_url

@property
def queue_url(self) -> str:
"""Property to provide QueueUrl, caching it for reuse."""
if not self._queue_url:
self._queue_url = self.get_queue_url()
return self._queue_url

def get_queue_url(self) -> str:
"""Get SQS queue URL from name."""
return self.client.get_queue_url(QueueName=self.queue_name)["QueueUrl"]

@staticmethod
def create_dss_message_attributes(
package_id: str, submission_source: str, output_queue: str
) -> dict[str, Any]:
"""Create attributes for a DSpace Submission Service message.

Args:
package_id: The PackageID field which is populated by the submission's
identifier.
submission_source: The source for the submission.
output_queue: The SQS output queue used for retrieving result messages.
"""
return {
"PackageID": {"DataType": "String", "StringValue": package_id},
"SubmissionSource": {"DataType": "String", "StringValue": submission_source},
"OutputQueue": {"DataType": "String", "StringValue": output_queue},
}

@staticmethod
def create_dss_message_body(
submission_system: str,
collection_handle: str,
metadata_s3_uri: str,
bitstream_file_name: str,
bitstream_s3_uri: str,
) -> str:
"""Create body for a DSpace Submission Service message.

Args:
submission_system: The system where the article is uploaded.
collection_handle: The handle of collection where the article is uploaded.
metadata_s3_uri: The S3 URI for the metadata JSON file.
bitstream_file_name: The file name for the article content which is uploaded as a
bitstream.
bitstream_s3_uri: The S3 URI for the article content file.
"""
return json.dumps(
{
"SubmissionSystem": submission_system,
"CollectionHandle": collection_handle,
"MetadataLocation": metadata_s3_uri,
"Files": [
{
"BitstreamName": bitstream_file_name,
"FileLocation": bitstream_s3_uri,
"BitstreamDescription": None,
}
],
}
)

def delete(self, receipt_handle: str) -> EmptyResponseMetadataTypeDef:
"""Delete message from SQS queue.

Args:
receipt_handle: The receipt handle of the message to be deleted.
"""
logger.debug("Deleting '{receipt_handle}' from SQS queue: {self.queue_name}")
response = self.client.delete_message(
QueueUrl=self.queue_url,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ReceiptHandle=receipt_handle,
)
logger.debug(f"Message deleted from SQS queue: {response}")

return response

def process_result_message(self, sqs_message: MessageTypeDef) -> tuple[str, str]:
"""Validate, extract data, and delete an SQS result message.

Args:
sqs_message: An SQS result message to be processed.
"""
self.validate_message(sqs_message)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔥

identifier = sqs_message["MessageAttributes"]["PackageID"]["StringValue"]
message_body = json.loads(str(sqs_message["Body"]))
self.delete(sqs_message["ReceiptHandle"])
return identifier, message_body

def receive(self) -> Iterator[MessageTypeDef]:
"""Receive messages from SQS queue."""
logger.debug(f"Receiving messages from SQS queue: {self.queue_name}")
while True:
response = self.client.receive_message(
QueueUrl=self.queue_url,
MaxNumberOfMessages=10,
MessageAttributeNames=["All"],
)
if "Messages" in response:
for message in response["Messages"]:
logger.debug(
f"Message retrieved from SQS queue {self.queue_name}: {message}"
)
yield message
else:
logger.debug(f"No more messages from SQS queue: {self.queue_name}")
break

def send(
self,
message_attributes: Mapping[str, MessageAttributeValueTypeDef],
message_body: str,
) -> SendMessageResultTypeDef:
"""Send message via SQS.

Args:
message_attributes: The attributes of the message to send.
message_body: The body of the message to send.
"""
logger.debug(f"Sending message to SQS queue: {self.queue_name}")
response = self.client.send_message(
QueueUrl=self.queue_url,
MessageAttributes=message_attributes,
MessageBody=str(message_body),
)
logger.debug(f"Response from SQS queue: {response}")
return response

def validate_message(self, sqs_message: MessageTypeDef) -> None:
"""Validate that an SQS message is formatted as expected.

Args:
sqs_message: An SQS message to be evaluated.

"""
if not sqs_message.get("ReceiptHandle"):
message = f"Failed to retrieve 'ReceiptHandle' from message: {sqs_message}"
raise InvalidSQSMessageError(message)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies for not doing what I said I would in this Slack thread, but I think we previously discussed: (a) adding Ruff rule EM102 to ignore list and (b) allow passing of simple error messages to exceptions!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated!

self.validate_message_attributes(sqs_message=sqs_message)
self.validate_message_body(sqs_message=sqs_message)

@staticmethod
def validate_message_attributes(sqs_message: MessageTypeDef) -> None:
"""Validate that "MessageAttributes" field is formatted as expected.

Args:
sqs_message: An SQS message to be evaluated.
"""
if (
"MessageAttributes" not in sqs_message
or not any(
field
for field in sqs_message["MessageAttributes"]
if "PackageID" in field
)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I must admint, this one is still tripping me up. Are we looking for "PackageID" as a key in the sqs_message["MessageAttributes"] dictionary?

If so, could we rewrite?

or "PackageID" not in sqs_message["MessageAttributes"]

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that we are looking at the validation method even closer, I am curious about this as well. Debugging the test, it seems the an sqs_message looks like:

{
    "MessageId": "47179327-6a75-467d-b431-d7bfd802b15e",
    "ReceiptHandle": "zqaejnmrlfkzlxsxexmzsnyjsiugkotjdouweeephwuigifrlsrwxshfqmguyjttklxxrogfiohgwezhqpywvrbgbueebxlmuenrwtgtgmhbaugtywthkppbixxbjowmkqcwiqapxjpirgeeuowpgdkrdskhawgimaaynsbekemlfxoapaskuewaa",
    "MD5OfBody": "2affecad36e208b6be933fce09b7a89e",
    "Body": '{"ResultType": "success", "ItemHandle": "1721.1/131022", "lastModified": "Thu Sep 09 17:56:39 UTC 2021", "Bitstreams": [{"BitstreamName": "10.1002-term.3131.pdf", "BitstreamUUID": "a1b2c3d4e5", "BitstreamChecksum": {"value": "a4e0f4930dfaff904fa3c6c85b0b8ecc", "checkSumAlgorithm": "MD5"}}]}',
    "MD5OfMessageAttributes": "5edd88184e055125bdab8e1b116ecf5e",
    "MessageAttributes": {
        "PackageID": {"StringValue": "10.1002/term.3131", "DataType": "String"},
        "SubmissionSource": {"StringValue": "Submission system", "DataType": "String"},
    },
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't know how I came up with that check and how it survived this long without refactoring but yes or "PackageID" not in sqs_message["MessageAttributes"] is much simpler

or not sqs_message["MessageAttributes"]["PackageID"].get("StringValue")
):
message = f"Failed to parse SQS message attributes: {sqs_message}"
raise InvalidSQSMessageError(message)

@staticmethod
def validate_message_body(sqs_message: MessageTypeDef) -> None:
"""Validate that "Body" field is formatted as expected.

Args:
sqs_message: An SQS message to be evaluated.
"""
if "Body" not in sqs_message or not json.loads(str(sqs_message["Body"])):
message = f"Failed to parse SQS message body: {sqs_message}"
raise InvalidSQSMessageError(message)
Loading
Loading