Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAS-2034 - Remove custom retry logic from HOSS in favour of harmony-service-lib. #4

Merged
merged 4 commits into from
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
## v1.0.1
### 2023-12-19

This version of HOSS removes custom, redundant download retry logic. Instead
retries are relied upon from `harmony-service-lib` and for each stage in a
Harmony workflow.

## v1.0.0
### 2023-10-06

Expand Down
2 changes: 1 addition & 1 deletion docker/service_version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.0.0
1.0.1
1 change: 1 addition & 0 deletions hoss/bbox_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def get_request_shape_file(message: Message, working_dir: str,
raise UnsupportedShapeFileFormat(message.subset.shape.type)

shape_file_url = message.subset.shape.process('href')
adapter_logger.info('Downloading request shape file')
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Initially I included the path to the shape file, but this is somewhat meaningless as Harmony creates a temporary file, which obfuscates any original user naming convention (the file name is essentially a generated ID).

local_shape_file_path = download(shape_file_url, working_dir,
logger=adapter_logger,
access_token=message.accessToken,
Expand Down
3 changes: 2 additions & 1 deletion hoss/dimension_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@
import numpy as np

from harmony.message import Message
from harmony.message_utility import rgetattr
from harmony.util import Config
from varinfo import VarInfoFromDmr

from hoss.bbox_utilities import flatten_list
from hoss.exceptions import InvalidNamedDimension, InvalidRequestedRange
from hoss.utilities import (format_variable_set_string, get_opendap_nc4,
get_value_or_default, rgetattr)
get_value_or_default)


IndexRange = Tuple[int]
Expand Down
11 changes: 0 additions & 11 deletions hoss/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,3 @@ class UrlAccessFailed(CustomError):
def __init__(self, url, status_code):
super().__init__('UrlAccessFailed',
f'{status_code} error retrieving: {url}')


class UrlAccessFailedWithRetries(CustomError):
""" This exception is raised when an HTTP request for a given URL has
failed a specified number of times.

"""
def __init__(self, url):
super().__init__('UrlAccessFailedWithRetries',
f'URL: {url} was unsuccessfully requested the '
'maximum number of times.')
3 changes: 2 additions & 1 deletion hoss/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import List, Set

from harmony.message import Message, Source, Variable as HarmonyVariable
from harmony.message_utility import rgetattr
from harmony.util import Config
from netCDF4 import Dataset
from numpy.ma import masked
Expand All @@ -17,7 +18,7 @@
from hoss.dimension_utilities import (add_index_range, get_fill_slice,
IndexRanges, is_index_subset,
get_requested_index_ranges,
prefetch_dimension_variables, rgetattr)
prefetch_dimension_variables)
from hoss.spatial import get_spatial_index_ranges
from hoss.temporal import get_temporal_index_ranges
from hoss.utilities import (download_url, format_variable_set_string,
Expand Down
75 changes: 21 additions & 54 deletions hoss/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,7 @@
from harmony.exceptions import ForbiddenException, ServerException
from harmony.util import Config, download as util_download

from hoss.exceptions import UrlAccessFailed, UrlAccessFailedWithRetries


HTTP_REQUEST_ATTEMPTS = 3
from hoss.exceptions import UrlAccessFailed


def get_file_mimetype(file_name: str) -> Tuple[Optional[str], Optional[str]]:
Expand Down Expand Up @@ -90,12 +87,12 @@ def download_url(url: str, destination: str, logger: Logger,
access_token: str = None, config: Config = None,
data=None) -> str:
""" Use built-in Harmony functionality to download from a URL. This is
expected to be used for obtaining the granule `.dmr` and the granule
itself (only the required variables).
expected to be used for obtaining the granule `.dmr`, a prefetch of
only dimensions and bounds variables, and the subsetted granule itself.

OPeNDAP can return intermittent 500 errors. This function will retry
the original request in the event of a 500 error, but not for other
error types. In those instances, the original HTTPError is re-raised.
OPeNDAP can return intermittent 500 errors. Retries will be performed
by inbuilt functionality in the `harmony-service-lib`. The OPeNDAP
errors are captured and re-raised as custom exceptions.

The return value is the location in the file-store of the downloaded
content from the URL.
Expand All @@ -106,32 +103,21 @@ def download_url(url: str, destination: str, logger: Logger,
if data is not None:
logger.info(f'POST request data: "{format_dictionary_string(data)}"')

request_completed = False
attempts = 0

while not request_completed and attempts < HTTP_REQUEST_ATTEMPTS:
attempts += 1

try:
response = util_download(
url,
destination,
logger,
access_token=access_token,
data=data,
cfg=config
)
request_completed = True
except ForbiddenException as harmony_exception:
raise UrlAccessFailed(url, 400) from harmony_exception
except ServerException as harmony_exception:
if attempts < HTTP_REQUEST_ATTEMPTS:
logger.info('500 error returned, retrying request.')
else:
raise UrlAccessFailedWithRetries(url) from harmony_exception
except Exception as harmony_exception:
# Not a 500 error, so raise immediately and exit the loop.
raise UrlAccessFailed(url, 'Unknown') from harmony_exception
try:
response = util_download(
url,
destination,
logger,
access_token=access_token,
data=data,
cfg=config
)
except ForbiddenException as harmony_exception:
raise UrlAccessFailed(url, 400) from harmony_exception
except ServerException as harmony_exception:
raise UrlAccessFailed(url, 500) from harmony_exception
except Exception as harmony_exception:
raise UrlAccessFailed(url, 'Unknown') from harmony_exception

return response

Expand Down Expand Up @@ -159,22 +145,3 @@ def get_value_or_default(value: Optional[float], default: float) -> float:

"""
return value if value is not None else default


def rgetattr(input_object, requested_attribute, *args):
""" This is a recursive version of the inbuilt `getattr` method, such that
it can be called to retrieve nested attributes. For example:
the Message.subset.shape within the input Harmony message.
Comment on lines -162 to -167
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice, I saw this yesterday browsing the code.


Note, if a default value is specified, this will be returned if any
attribute in the specified chain is absent from the supplied object.

"""
if '.' not in requested_attribute:
result = getattr(input_object, requested_attribute, *args)
else:
attribute_pieces = requested_attribute.split('.')
result = rgetattr(getattr(input_object, attribute_pieces[0], *args),
'.'.join(attribute_pieces[1:]), *args)

return result
2 changes: 1 addition & 1 deletion pip_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# This file should contain requirements to be installed via Pip.
# Open source packages available from PyPI
earthdata-varinfo ~= 1.0.0
harmony-service-lib ~= 1.0.23
harmony-service-lib ~= 1.0.25
netCDF4 ~= 1.6.4
numpy ~= 1.24.2
pyproj ~= 3.6.1
Expand Down
87 changes: 17 additions & 70 deletions tests/unit/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,12 @@
from harmony.exceptions import ForbiddenException, ServerException
from harmony.util import config

from hoss.exceptions import UrlAccessFailed, UrlAccessFailedWithRetries
from hoss.exceptions import UrlAccessFailed
from hoss.utilities import (download_url, format_dictionary_string,
format_variable_set_string,
get_constraint_expression, get_file_mimetype,
get_opendap_nc4, get_value_or_default,
HTTP_REQUEST_ATTEMPTS, move_downloaded_nc4,
rgetattr)
move_downloaded_nc4)


class TestUtilities(TestCase):
Expand Down Expand Up @@ -42,11 +41,9 @@ def test_get_file_mimetype(self):

@patch('hoss.utilities.util_download')
def test_download_url(self, mock_util_download):
""" Ensure that the `harmony.util.download` function is called. Also
ensure that if a 500 error is returned, the request is retried. If
a different HTTPError occurs, the caught HTTPError should be
re-raised. Finally, check the maximum number of request attempts is
not exceeded.
""" Ensure that the `harmony.util.download` function is called. If an
error occurs, the caught exception should be re-raised with a
custom exception with a human-readable error message.

"""
output_directory = 'output/dir'
Expand Down Expand Up @@ -88,14 +85,22 @@ def test_download_url(self, mock_util_download):
)
mock_util_download.reset_mock()

with self.subTest('500 error triggers a retry.'):
with self.subTest('500 error is caught and handled.'):
mock_util_download.side_effect = [self.harmony_500_error,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This subtest name is incorrect now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. I updated that test name (and also the doc string for the overall test that mentioned retries would be tested)

http_response]

response = download_url(test_url, output_directory, self.logger)
with self.assertRaises(UrlAccessFailed):
download_url(test_url, output_directory, self.logger,
access_token, self.config)

self.assertEqual(response, http_response)
self.assertEqual(mock_util_download.call_count, 2)
mock_util_download.assert_called_once_with(
test_url,
output_directory,
self.logger,
access_token=access_token,
data=None,
cfg=self.config
)
mock_util_download.reset_mock()

with self.subTest('Non-500 error does not retry, and is re-raised.'):
Expand All @@ -116,17 +121,6 @@ def test_download_url(self, mock_util_download):
)
mock_util_download.reset_mock()

with self.subTest('Maximum number of attempts not exceeded.'):
mock_util_download.side_effect = [
self.harmony_500_error
] * (HTTP_REQUEST_ATTEMPTS + 1)
with self.assertRaises(UrlAccessFailedWithRetries):
download_url(test_url, output_directory, self.logger)

self.assertEqual(mock_util_download.call_count,
HTTP_REQUEST_ATTEMPTS)
mock_util_download.reset_mock()

@patch('hoss.utilities.move_downloaded_nc4')
@patch('hoss.utilities.util_download')
def test_get_opendap_nc4(self, mock_download, mock_move_download):
Expand Down Expand Up @@ -259,50 +253,3 @@ def test_get_value_or_default(self):

with self.subTest('Value = None returns the supplied default'):
self.assertEqual(get_value_or_default(None, 20), 20)

def test_rgetattr(self):
""" Ensure that the recursive function can retrieve nested attributes
and uses the default argument when required.

"""
class Child:
def __init__(self, name):
self.name = name

class Parent:
def __init__(self, name, child_name):
self.name = name
self.child = Child(child_name)
self.none = None

test_parent = Parent('parent_name', 'child_name')

with self.subTest('Parent level attribute'):
self.assertEqual(rgetattr(test_parent, 'name'), 'parent_name')

with self.subTest('Nested attribute'):
self.assertEqual(rgetattr(test_parent, 'child.name'), 'child_name')
with self.subTest('Missing parent with default'):
self.assertEqual(rgetattr(test_parent, 'absent', 'default'),
'default')

with self.subTest('Missing child attribute with default'):
self.assertEqual(rgetattr(test_parent, 'child.absent', 'default'),
'default')
with self.subTest('Child requested, parent missing, default'):
self.assertEqual(
rgetattr(test_parent, 'none.something', 'default'),
'default'
)

with self.subTest('Missing parent, with no default'):
with self.assertRaises(AttributeError):
rgetattr(test_parent, 'absent')

with self.subTest('Missing child, with no default'):
with self.assertRaises(AttributeError):
rgetattr(test_parent, 'child.absent')

with self.subTest('Child requested, parent missing, no default'):
with self.assertRaises(AttributeError):
rgetattr(test_parent, 'none.something')