From a27e8f6130e5fc6bb00c1dd1740977a415408d9e Mon Sep 17 00:00:00 2001 From: Emanuele Giaquinta Date: Thu, 29 Feb 2024 21:22:38 +0200 Subject: [PATCH 1/2] object_storage: remove ad-hoc retry mechanism for s3 transfers Boto3 clients have a builtin retry mechanism, see https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html --- rohmu/object_storage/s3.py | 78 ++++++++++++++++++-------------------- 1 file changed, 36 insertions(+), 42 deletions(-) diff --git a/rohmu/object_storage/s3.py b/rohmu/object_storage/s3.py index a9f758aa..44692a99 100644 --- a/rohmu/object_storage/s3.py +++ b/rohmu/object_storage/s3.py @@ -402,54 +402,48 @@ def multipart_upload_file_object( if not data: break - attempts = 10 start_of_part_upload = time.monotonic() - while True: - attempts -= 1 - self.stats.operation(StorageOperation.store_file, size=len(data)) + self.stats.operation(StorageOperation.store_file, size=len(data)) + try: + cup_response = self.s3_client.upload_part( + Body=data, + Bucket=self.bucket_name, + Key=path, + PartNumber=part_number, + UploadId=mp_id, + ) + except botocore.exceptions.ClientError as ex: + self.log.exception("Uploading part %d for %s failed", part_number, path) + self.stats.operation(StorageOperation.multipart_aborted) try: - cup_response = self.s3_client.upload_part( - Body=data, + self.s3_client.abort_multipart_upload( Bucket=self.bucket_name, Key=path, - PartNumber=part_number, UploadId=mp_id, ) - except botocore.exceptions.ClientError as ex: - self.log.exception("Uploading part %d for %s failed, attempts left: %d", part_number, path, attempts) - if attempts <= 0: - self.stats.operation(StorageOperation.multipart_aborted) - try: - self.s3_client.abort_multipart_upload( - Bucket=self.bucket_name, - Key=path, - UploadId=mp_id, - ) - finally: - err = f"Multipart upload of {path} failed: {ex.__class__.__name__}: {ex}" - raise StorageError(err) from ex - else: - time.sleep(1.0) - else: - self.log.info( - "Uploaded part %s of %s, size %s in %.2fs", - part_number, - chunks, - len(data), - time.monotonic() - start_of_part_upload, - ) - parts.append( - { - "ETag": cup_response["ETag"], - "PartNumber": part_number, - } - ) - part_number += 1 - bytes_sent += len(data) - if progress_fn: - # TODO: change this to incremental progress. Size parameter is currently unused. - progress_fn(bytes_sent, size) # type: ignore[arg-type] - break + finally: + err = f"Multipart upload of {path} failed: {ex.__class__.__name__}: {ex}" + raise StorageError(err) from ex + else: + self.log.info( + "Uploaded part %s of %s, size %s in %.2fs", + part_number, + chunks, + len(data), + time.monotonic() - start_of_part_upload, + ) + parts.append( + { + "ETag": cup_response["ETag"], + "PartNumber": part_number, + } + ) + part_number += 1 + bytes_sent += len(data) + if progress_fn: + # TODO: change this to incremental progress. Size parameter is currently unused. + progress_fn(bytes_sent, size) # type: ignore[arg-type] + break self.stats.operation(StorageOperation.multipart_complete) try: From 24d4c8186af89c766d89629c489b3a8e9dd9f0f8 Mon Sep 17 00:00:00 2001 From: Emanuele Giaquinta Date: Thu, 29 Feb 2024 21:29:44 +0200 Subject: [PATCH 2/2] object_storage: improve retry handling for s3 transfers This commit changes the Boto3 s3 client to use the standard retry mode, as it is better than the default legacy one, and increases the maximum number of attempts, as it is only 3 by default. --- rohmu/object_storage/s3.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rohmu/object_storage/s3.py b/rohmu/object_storage/s3.py index 44692a99..a735fc52 100644 --- a/rohmu/object_storage/s3.py +++ b/rohmu/object_storage/s3.py @@ -162,6 +162,10 @@ def __init__( s3={"addressing_style": S3AddressingStyle(addressing_style).value}, signature_version=signature_version, proxies=proxies, + retries={ + "max_attempts": 10, + "mode": "standard", + }, **timeouts, ) if not is_verify_tls and cert_path is not None: