Skip to content

Commit

Permalink
Merge pull request #443 from nsidc/auth-439
Browse files Browse the repository at this point in the history
fixing searching for restricted datasets and accessing ASF on demand data from Opera
  • Loading branch information
betolink authored Feb 11, 2024
2 parents 5805b14 + d20f713 commit e187ae0
Show file tree
Hide file tree
Showing 7 changed files with 1,078 additions and 966 deletions.
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Changelog

## [Unreleased]

* Bug fixes:
* fixed #439 by implementing more trusted domains in the SessionWithRedirection
* fixed #438 by using an authenticated session for hits()
* Enhancements:
* addressing #427 by adding parameters to collection query

## [v0.8.2] 2023-12-06
* Bug fixes:
* Enable AWS check with IMDSv2
Expand Down Expand Up @@ -167,7 +175,7 @@
- Add basic classes to interact with NASA CMR, EDL and cloud access.
- Basic object formatting.

[Unreleased]: https://github.com/nsidc/earthaccess/compare/v0.5.2...HEAD
[Unreleased]: https://github.com/nsidc/earthaccess/compare/v0.8.2...HEAD
[v0.5.2]: https://github.com/nsidc/earthaccess/releases/tag/v0.5.2
[v0.5.1]: https://github.com/nsidc/earthaccess/releases/tag/v0.5.1
[v0.5.0]: https://github.com/nsidc/earthaccess/releases/tag/v0.4.0
Expand Down
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,6 @@ With *earthaccess* we can login, search and download data with a few lines of co

The only requirement to use this library is to open a free account with NASA [EDL](https://urs.earthdata.nasa.gov).

<a href="https://urs.earthdata.nasa.gov"><img src="https://auth.ops.maap-project.org/cas/images/urs-logo.png" /></a>


### **Authentication**

Expand Down
2 changes: 1 addition & 1 deletion binder/environment-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ channels:
dependencies:
# This environment bootstraps poetry, the actual dev environment
# is installed and managed with poetry
- python=3.9
- python=3.10
- jupyterlab=3
- xarray>=0.19
- ipyleaflet>=0.13
Expand Down
30 changes: 23 additions & 7 deletions earthaccess/auth.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import getpass
import importlib.metadata
import logging
import os
from netrc import NetrcParseError
Expand All @@ -11,22 +12,35 @@

from .daac import DAACS

try:
user_agent = f"earthaccess v{importlib.metadata.version('earthaccess')}"
except importlib.metadata.PackageNotFoundError:
user_agent = "earthaccess"


logger = logging.getLogger(__name__)


class SessionWithHeaderRedirection(requests.Session):
"""
Requests removes auth headers if the redirect happens outside the
original req domain.
This is taken from https://wiki.earthdata.nasa.gov/display/EL/How+To+Access+Data+With+Python
"""

AUTH_HOST = "urs.earthdata.nasa.gov"
AUTH_HOSTS: List[str] = [
"urs.earthdata.nasa.gov",
"cumulus.asf.alaska.edu",
"sentinel1.asf.alaska.edu",
"nisar.asf.alaska.edu",
"datapool.asf.alaska.edu",
]

def __init__(
self, username: Optional[str] = None, password: Optional[str] = None
) -> None:
super().__init__()
self.headers.update({"User-Agent": user_agent})

if username and password:
self.auth = (username, password)

Expand All @@ -39,11 +53,13 @@ def rebuild_auth(self, prepared_request: Any, response: Any) -> None:
if "Authorization" in headers:
original_parsed = urlparse(response.request.url)
redirect_parsed = urlparse(url)
if (
(original_parsed.hostname != redirect_parsed.hostname)
and redirect_parsed.hostname != self.AUTH_HOST
and original_parsed.hostname != self.AUTH_HOST
if (original_parsed.hostname != redirect_parsed.hostname) and (
redirect_parsed.hostname not in self.AUTH_HOSTS
or original_parsed.hostname not in self.AUTH_HOSTS
):
logger.debug(
f"Deleting Auth Headers: {original_parsed.hostname} -> {redirect_parsed.hostname}"
)
del headers["Authorization"]
return

Expand Down Expand Up @@ -208,7 +224,7 @@ def get_session(self, bearer_token: bool = True) -> requests.Session:
Returns:
class Session instance with Auth and bearer token headers
"""
session = requests.Session()
session = SessionWithHeaderRedirection()
if bearer_token and self.authenticated:
# This will avoid the use of the netrc after we are logged in
session.trust_env = False
Expand Down
44 changes: 43 additions & 1 deletion earthaccess/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,16 @@ def hits(self) -> int:
Returns:
The number of results reported by CMR.
"""
return super().hits()
url = self._build_url()

response = self.session.get(url, headers=self.headers, params={"page_size": 0})

try:
response.raise_for_status()
except exceptions.HTTPError as ex:
raise RuntimeError(ex.response.text)

return int(response.headers["CMR-Hits"])

def concept_id(self, IDs: List[str]) -> Type[CollectionQuery]:
"""Filter by concept ID.
Expand Down Expand Up @@ -107,6 +116,39 @@ def doi(self, doi: str) -> Type[CollectionQuery]:
self.params["doi"] = doi
return self

def instrument(self, instrument: str) -> Type[CollectionQuery]:
"""Searh datasets by instrument
???+ Tip
Not all datasets have an associated instrument. This works
only at the dataset level but not the granule (data) level.
Parameters:
instrument (String): instrument of a datasets, e.g. instrument=GEDI
"""
if not isinstance(instrument, str):
raise TypeError("instrument must be of type str")

self.params["instrument"] = instrument
return self

def project(self, project: str) -> Type[CollectionQuery]:
"""Searh datasets by associated project
???+ Tip
Not all datasets have an associated project. This works
only at the dataset level but not the granule (data) level.
Will return datasets across DAACs matching the project.
Parameters:
project (String): associated project of a datasets, e.g. project=EMIT
"""
if not isinstance(project, str):
raise TypeError("project must be of type str")

self.params["project"] = project
return self

def parameters(self, **kwargs: Any) -> Type[CollectionQuery]:
"""Provide query parameters as keyword arguments. The keyword needs to match the name
of the method, and the value should either be the value or a tuple of values.
Expand Down
Loading

0 comments on commit e187ae0

Please sign in to comment.