From 0b3c379369e11d5b4883b42eb8a64245a246b2f7 Mon Sep 17 00:00:00 2001 From: Rohan Weeden Date: Tue, 10 Dec 2024 13:19:51 -0500 Subject: [PATCH] Add CmrQuery storage class as a convenience around HttpRequest --- mandible/metadata_mapper/storage/__init__.py | 6 +++ mandible/metadata_mapper/storage/cmr_query.py | 46 +++++++++++++++++++ .../metadata_mapper/storage/http_request.py | 32 ++++++++----- mandible/metadata_mapper/storage/storage.py | 6 +++ tests/test_storage.py | 34 ++++++++++++++ 5 files changed, 112 insertions(+), 12 deletions(-) create mode 100644 mandible/metadata_mapper/storage/cmr_query.py diff --git a/mandible/metadata_mapper/storage/__init__.py b/mandible/metadata_mapper/storage/__init__.py index 198b685..298866a 100644 --- a/mandible/metadata_mapper/storage/__init__.py +++ b/mandible/metadata_mapper/storage/__init__.py @@ -8,6 +8,11 @@ StorageError, ) +try: + from .cmr_query import CmrQuery +except ImportError: + from .storage import CmrQuery + try: from .http_request import HttpRequest except ImportError: @@ -15,6 +20,7 @@ __all__ = ( + "CmrQuery", "Dummy", "FilteredStorage", "HttpRequest", diff --git a/mandible/metadata_mapper/storage/cmr_query.py b/mandible/metadata_mapper/storage/cmr_query.py new file mode 100644 index 0000000..9f80605 --- /dev/null +++ b/mandible/metadata_mapper/storage/cmr_query.py @@ -0,0 +1,46 @@ +import urllib.parse +from dataclasses import InitVar, dataclass +from typing import Optional + +from mandible.metadata_mapper.context import Context + +from .http_request import HttpRequest + + +@dataclass +class CmrQuery(HttpRequest): + """A convenience class for setting neccessary CMR parameters""" + + url: InitVar[None] = None + + base_url: str = "" + path: str = "" + format: str = "" + token: Optional[str] = None + + def __post_init__(self, url: str): + if url: + raise ValueError( + "do not set 'url' directly, use 'base_url' and 'path' instead", + ) + + def _get_override_request_args(self, context: Context) -> dict: + return { + "headers": self._get_headers(), + "url": self._get_url(), + } + + def _get_headers(self) -> Optional[dict]: + if self.token is None: + return self.headers + + return { + **(self.headers or {}), + "Authorization": self.token, + } + + def _get_url(self) -> str: + path = self.path + if self.format: + path = f"{self.path}.{self.format.lower()}" + return urllib.parse.urljoin(self.base_url, path) diff --git a/mandible/metadata_mapper/storage/http_request.py b/mandible/metadata_mapper/storage/http_request.py index 9292e65..f67c8a0 100644 --- a/mandible/metadata_mapper/storage/http_request.py +++ b/mandible/metadata_mapper/storage/http_request.py @@ -13,6 +13,8 @@ class HttpRequest(Storage): """A storage which returns the body of an HTTP response""" + # TODO(reweeden): python3.10 added support for KW_ONLY arguments which can + # be used to clean up the inheritance here a bit. url: str method: str = "GET" params: Optional[dict] = None @@ -24,18 +26,21 @@ class HttpRequest(Storage): allow_redirects: bool = True def open_file(self, context: Context) -> IO[bytes]: - response = requests.request( - self.method, - self.url, - params=self.params, - data=self.data, - json=self.json, - headers=self.headers, - cookies=self.cookies, - timeout=self.timeout, - allow_redirects=self.allow_redirects, - stream=True, - ) + kwargs = { + "allow_redirects": self.allow_redirects, + "cookies": self.cookies, + "data": self.data, + "headers": self.headers, + "json": self.json, + "method": self.method, + "params": self.params, + "stream": True, + "timeout": self.timeout, + "url": self.url, + # Allow subclasses to override these + **self._get_override_request_args(context), + } + response = requests.request(**kwargs) # TODO(reweeden): Using response.content causes the entire response # payload to be loaded into memory immediately. Ideally, we would @@ -44,3 +49,6 @@ def open_file(self, context: Context) -> IO[bytes]: # object, however, this doesn't preform the content decoding that you # get when using response.content. return io.BytesIO(response.content) + + def _get_override_request_args(self, context: Context) -> dict: + return {} diff --git a/mandible/metadata_mapper/storage/storage.py b/mandible/metadata_mapper/storage/storage.py index 7fb53c3..a6854ae 100644 --- a/mandible/metadata_mapper/storage/storage.py +++ b/mandible/metadata_mapper/storage/storage.py @@ -56,6 +56,12 @@ def __init__(self): super().__init__("requests") +@dataclass +class CmrQuery(_PlaceholderBase): + def __init__(self): + super().__init__("requests") + + # Define storages that don't require extra dependencies @dataclass diff --git a/tests/test_storage.py b/tests/test_storage.py index a5d1f3d..ebd1b92 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -6,6 +6,7 @@ from mandible.metadata_mapper.context import Context from mandible.metadata_mapper.storage import ( STORAGE_REGISTRY, + CmrQuery, Dummy, HttpRequest, LocalFile, @@ -17,6 +18,7 @@ def test_registry(): assert STORAGE_REGISTRY == { + "CmrQuery": CmrQuery, "Dummy": Dummy, "HttpRequest": HttpRequest, "LocalFile": LocalFile, @@ -180,3 +182,35 @@ def create_file(bucket, name, contents=None, type="data"): }) with storage.open_file(context) as f: assert f.read() == b"Content from file2.txt\n" + + +def test_cmr_query_params(): + with pytest.raises(ValueError): + CmrQuery(url="foobar") + + assert CmrQuery( + base_url="http://foo.bar", + path="/search/granules", + )._get_url() == "http://foo.bar/search/granules" + assert CmrQuery( + base_url="http://foo.bar", + path="search/granules", + )._get_url() == "http://foo.bar/search/granules" + assert CmrQuery( + base_url="http://foo.bar/", + path="/search/granules", + )._get_url() == "http://foo.bar/search/granules" + assert CmrQuery( + base_url="http://foo.bar/", + path="search/granules", + )._get_url() == "http://foo.bar/search/granules" + + assert CmrQuery( + base_url="http://foo.bar", + path="/search/granules", + format="umm_json", + )._get_url() == "http://foo.bar/search/granules.umm_json" + + assert CmrQuery(token="foobar")._get_headers() == { + "Authorization": "foobar", + }