diff --git a/.github/actions/install-pkg/action.yml b/.github/actions/install-pkg/action.yml index 5e8bd9e4..4b4e2d92 100644 --- a/.github/actions/install-pkg/action.yml +++ b/.github/actions/install-pkg/action.yml @@ -21,4 +21,4 @@ runs: - name: Install package and test dependencies shell: bash - run: pip install .[test] + run: pip install --root-user-action ignore ".[test]" diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index fba60ccc..b868549d 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -47,6 +47,7 @@ jobs: steps: - name: Fetch user permission + if: github.event_name == 'pull_request_target' id: permission uses: actions-cool/check-user-permission@v2 with: @@ -54,7 +55,11 @@ jobs: username: ${{ github.triggering_actor }} - name: Check user permission - if: ${{ steps.permission.outputs.require-result == 'false' }} + # The name of the output require-result is a bit confusing, but when its value + # is 'false', it means that the triggering actor does NOT have the required + # permission. + if: github.event_name == 'pull_request_target' && steps.permission.outputs.require-result == 'false' + # If the triggering actor does not have write permission (i.e., this is a # PR from a fork), then we exit, otherwise most of the integration tests will # fail because they require access to secrets. In this case, a maintainer @@ -78,8 +83,6 @@ jobs: env: EARTHDATA_USERNAME: ${{ secrets.EDL_USERNAME }} EARTHDATA_PASSWORD: ${{ secrets.EDL_PASSWORD }} - EARTHACCESS_TEST_USERNAME: ${{ secrets.EDL_USERNAME }} - EARTHACCESS_TEST_PASSWORD: ${{ secrets.EDL_PASSWORD }} run: ./scripts/integration-test.sh - name: Upload coverage report diff --git a/.github/workflows/test-mindeps.yml b/.github/workflows/test-mindeps.yml index eb07823a..3705c69b 100644 --- a/.github/workflows/test-mindeps.yml +++ b/.github/workflows/test-mindeps.yml @@ -30,7 +30,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version-file: pyproject.toml + python-version: 3.9 - name: Install minimum-compatible dependencies run: uv sync --resolution lowest-direct --extra test diff --git a/.gitignore b/.gitignore index d0ee3ea9..f3567622 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ htmlcov dist site .coverage +.coverage.* coverage.xml .netlify test.db diff --git a/CHANGELOG.md b/CHANGELOG.md index 10703af5..ab69d69c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,56 +1,81 @@ # Changelog -## [v0.11.0] 2024-10-01 +## [Unreleased] + +### Changed + +- Use built-in `assert` statements instead of `unittest` assertions in + integration tests ([#743](https://github.com/nsidc/earthaccess/issues/743)) + (@chuckwondo) + +### Added + +- Add support for `NETRC` environment variable to override default `.netrc` file + location ([#480](https://github.com/nsidc/earthaccess/issues/480)) + (@chuckwondo) +- Add `nox` session for running integration tests locally + ([#815](https://github.com/nsidc/earthaccess/issues/815)) (@chuckwondo) + +### Removed + +### Fixed + +- Integration tests no longer clobber existing `.netrc` file + ([#806](https://github.com/nsidc/earthaccess/issues/806)) + (@chuckwondo) + +## [0.11.0] 2024-10-01 ### Changed - Automatically refresh EDL token and deprecate the `Auth.refresh_tokens` method with no replacement, as there is no longer a need to explicitly refresh ([#484](https://github.com/nsidc/earthaccess/issues/484)) - ([**@fwfichtner**](https://github.com/fwfichtner)) + ([@fwfichtner](https://github.com/fwfichtner)) - Deprecate `earthaccess.get_s3fs_session` and `Store.get_s3fs_session`. Use `earthaccess.get_s3_filesystem` and `Store.get_s3_filesystem`, respectively, instead ([#766](https://github.com/nsidc/earthaccess/issues/766)) - ([**@Sherwin-14**](https://github.com/Sherwin-14), - [**@chuckwondo**](https://github.com/chuckwondo)) + ([@Sherwin-14](https://github.com/Sherwin-14), + [@chuckwondo](https://github.com/chuckwondo)) ### Added -- Added Issue Templates - ([#281](https://github.com/nsidc/earthaccess/issues/281)) - ([**@Sherwin-14**](https://github.com/Sherwin-14)) +- Add Issue Templates ([#281](https://github.com/nsidc/earthaccess/issues/281)) + ([@Sherwin-14](https://github.com/Sherwin-14)) - Support Service queries ([#447](https://github.com/nsidc/earthaccess/issues/447)) - ([**@nikki-t**](https://github.com/nikki-t), - [**@chuckwondo**](https://github.com/chuckwondo), - [**@mfisher87**](https://github.com/mfisher87), - [**@betolink**](https://github.com/betolink)) - -- Added example PR links to pull request template + ([@nikki-t](https://github.com/nikki-t), + [@chuckwondo](https://github.com/chuckwondo), + [@mfisher87](https://github.com/mfisher87), + [@betolink](https://github.com/betolink)) +- Add example PR links to pull request template ([#756](https://github.com/nsidc/earthaccess/issues/756)) - [**@Sherwin-14**](https://github.com/Sherwin-14), - [**@mfisher87**](https://github.com/mfisher87) + ([@Sherwin-14](https://github.com/Sherwin-14), + [@mfisher87](https://github.com/mfisher87)) +- Add Contributing Naming Convention document + ([#532](https://github.com/nsidc/earthaccess/issues/532)) + ([@Sherwin-14](https://github.com/Sherwin-14), + [@mfisher87](https://github.com/mfisher87)) -- Added Contributing Naming Convention document - ([#532](https://github.com/nsidc/earthaccess/issues/532)) - [**@Sherwin-14**](https://github.com/Sherwin-14), - [**@mfisher87**](https://github.com/mfisher87) +### Removed + +- Remove `binder/` directory, as we no longer need a special + [binder](https://mybinder.org) environment with the top-level + `environment.yml` introduced in + [#733](https://github.com/nsidc/earthaccess/issues/733) + ([@jhkennedy](https://github.com/jhkennedy)) ### Fixed -- Removed Broken Link "Introduction to NASA earthaccess" +- Remove broken link "Introduction to NASA earthaccess" ([#779](https://github.com/nsidc/earthaccess/issues/779)) - ([**@Sherwin-14**](https://github.com/Sherwin-14)) + ([@Sherwin-14](https://github.com/Sherwin-14)) - Restore automation for tidying notebooks used in documentation ([#788](https://github.com/nsidc/earthaccess/issues/788)) - ([**@itcarroll**](https://github.com/itcarroll)) - -### Removed - -- Remove `binder/` directory, as we no longer need a special [binder](https://mybinder.org) - environment with the top-level `environment.yml` introduced in - [#733](https://github.com/nsidc/earthaccess/issues/733) - ([@jhkennedy](https://github.com/jhkennedy)) + ([@itcarroll](https://github.com/itcarroll)) +- Remove the base class on `EarthAccessFile` to fix method resolution + ([#610](https://github.com/nsidc/earthaccess/issues/610)) + ([@itcarroll](https://github.com/itcarroll)) ## [0.10.0] 2024-07-19 @@ -58,55 +83,55 @@ - Perform YAML formatting with `yamlfmt` instead of `prettier` ([#555](https://github.com/nsidc/earthaccess/issues/555)) - ([**@chuckwondo**](https://github.com/chuckwondo), - [**@mfisher87**](https://github.com/mfisher87)) + ([@chuckwondo](https://github.com/chuckwondo), + [@mfisher87](https://github.com/mfisher87)) - Replace `print` calls with `logging` calls where appropriate and add T20 Ruff rule ([#511](https://github.com/nsidc/earthaccess/issues/511)) - ([**@botanical**](https://github.com/botanical), - [**@chuckwondo**](https://github.com/chuckwondo), - [**@mfisher87**](https://github.com/mfisher87)) + ([@botanical](https://github.com/botanical), + [@chuckwondo](https://github.com/chuckwondo), + [@mfisher87](https://github.com/mfisher87)) - Update `CHANGELOG.md` to follow Common Changelog conventions ([#584](https://github.com/nsidc/earthaccess/pull/584)) - ([**@danielfromearth**](https://github.com/danielfromearth), - [**@chuckwondo**](https://github.com/chuckwondo), - [**@jhkennedy**](https://github.com/jhkennedy), - [**@mfisher87**](https://github.com/mfisher87)) + ([@danielfromearth](https://github.com/danielfromearth), + [@chuckwondo](https://github.com/chuckwondo), + [@jhkennedy](https://github.com/jhkennedy), + [@mfisher87](https://github.com/mfisher87)) ### Added - Enable queries to Earthdata User Acceptance Testing (UAT) system for authenticated accounts ([#421](https://github.com/nsidc/earthaccess/issues/421)) - ([**@danielfromearth**](https://github.com/danielfromearth), - [**@mfisher87**](https://github.com/mfisher87), - [**@jhkennedy**](https://github.com/jhkennedy), - [**@chuckwondo**](https://github.com/chuckwondo), - [**@betolink**](https://github.com/betolink)) + ([@danielfromearth](https://github.com/danielfromearth), + [@mfisher87](https://github.com/mfisher87), + [@jhkennedy](https://github.com/jhkennedy), + [@chuckwondo](https://github.com/chuckwondo), + [@betolink](https://github.com/betolink)) - Add support for Python 3.12 ([#457](https://github.com/nsidc/earthaccess/issues/457)) - ([**@chuckwondo**](https://github.com/chuckwondo), - [**@mfisher87**](https://github.com/mfisher87)) + ([@chuckwondo](https://github.com/chuckwondo), + [@mfisher87](https://github.com/mfisher87)) - Added documentation for the backwards compatibility ([#471](https://github.com/nsidc/earthaccess/issues/471)) - ([**@Sherwin-14**](https://github.com/Sherwin-14), - [**@mfisher87**](https://github.com/mfisher87)) + ([@Sherwin-14](https://github.com/Sherwin-14), + [@mfisher87](https://github.com/mfisher87)) ### Removed - **Breaking:** Remove support for Python 3.8 ([#457](https://github.com/nsidc/earthaccess/issues/457)) - ([**@mfisher87**](https://github.com/mfisher87), - [**@chuckwondo**](https://github.com/chuckwondo)) + ([@mfisher87](https://github.com/mfisher87), + [@chuckwondo](https://github.com/chuckwondo)) - **Breaking:** Remove the `get_user_profile` method and the `email_address` and `profile` attributes from the `Auth` class. Calling the EDL API to get user profile information is not intended for library access and is not necessary for this library's intended use cases. ([#421](https://github.com/nsidc/earthaccess/issues/421)) - ([**@danielfromearth**](https://github.com/danielfromearth), - [**@mfisher87**](https://github.com/mfisher87), - [**@jhkennedy**](https://github.com/jhkennedy), - [**@chuckwondo**](https://github.com/chuckwondo), - [**@betolink**](https://github.com/betolink)) + ([@danielfromearth](https://github.com/danielfromearth), + [@mfisher87](https://github.com/mfisher87), + [@jhkennedy](https://github.com/jhkennedy), + [@chuckwondo](https://github.com/chuckwondo), + [@betolink](https://github.com/betolink)) ### Fixed @@ -114,25 +139,25 @@ [Search After](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html#search-after) for collection and granule searches to support deep-paging through large result sets ([#483](https://github.com/nsidc/earthaccess/issues/483)) - ([**@doug-newman-nasa**](https://github.com/doug-newman-nasa), - [**@chuckwondo**](https://github.com/chuckwondo), - [**@mfisher87**](https://github.com/mfisher87), - [**@betolink**](https://github.com/betolink)) + ([@doug-newman-nasa](https://github.com/doug-newman-nasa), + [@chuckwondo](https://github.com/chuckwondo), + [@mfisher87](https://github.com/mfisher87), + [@betolink](https://github.com/betolink)) - Correct and enhance static type hints for functions and methods that make CMR queries or handle CMR query results ([#508](https://github.com/nsidc/earthaccess/issues/508)) - ([**@mfisher87**](https://github.com/mfisher87), - [**@jhkennedy**](https://github.com/jhkennedy), - [**@chuckwondo**](https://github.com/chuckwondo), - [**@betolink**](https://github.com/betolink)) + ([@mfisher87](https://github.com/mfisher87), + [@jhkennedy](https://github.com/jhkennedy), + [@chuckwondo](https://github.com/chuckwondo), + [@betolink](https://github.com/betolink)) - Create destination path prior to direct S3 downloads, if it doesn't already exist ([#562](https://github.com/nsidc/earthaccess/issues/562)) - ([**@itcarroll**](https://github.com/itcarroll), - [**@mfisher87**](https://github.com/mfisher87), - [**@chuckwondo**](https://github.com/chuckwondo)) + ([@itcarroll](https://github.com/itcarroll), + [@mfisher87](https://github.com/mfisher87), + [@chuckwondo](https://github.com/chuckwondo)) - Fix broken image link in sea level rise tutorial ([#427](https://github.com/nsidc/earthaccess/issues/427)) - ([**@jbrownrs**](https://github.com/jbrownrs)) + ([@jbrownrs](https://github.com/jbrownrs)) ## [0.9.0] - 2024-02-28 @@ -140,28 +165,28 @@ - Improve search by adding instrument and project to collection queries ([#427](https://github.com/nsidc/earthaccess/issues/427)) - ([**@betolink**](https://github.com/betolink), - [**@mfisher87**](https://github.com/mfisher87), - [**@jhkennedy**](https://github.com/jhkennedy)) + ([@betolink](https://github.com/betolink), + [@mfisher87](https://github.com/mfisher87), + [@jhkennedy](https://github.com/jhkennedy)) - Add user-agent in the request to track usage ([#436](https://github.com/nsidc/earthaccess/issues/436)) - ([**@asteiker**](https://github.com/asteiker), - [**@@mikedorfman**](https://github.com/@mikedorfman), - [**@betolink**](https://github.com/betolink)) + ([@asteiker](https://github.com/asteiker), + [@@mikedorfman](https://github.com/@mikedorfman), + [@betolink](https://github.com/betolink)) ### Fixed - Implement more trusted domains in the SessionWithRedirection ([#439](https://github.com/nsidc/earthaccess/issues/439)) - ([**@cmspeed**](https://github.com/cmspeed), - [**@mfisher87**](https://github.com/mfisher87), - [**@betolink**](https://github.com/betolink), - [**@jhkennedy**](https://github.com/jhkennedy)) + ([@cmspeed](https://github.com/cmspeed), + [@mfisher87](https://github.com/mfisher87), + [@betolink](https://github.com/betolink), + [@jhkennedy](https://github.com/jhkennedy)) - Use an authenticated session for hits() instead of calling parent's class super() ([#438](https://github.com/nsidc/earthaccess/issues/438)) - ([**@betolink**](https://github.com/betolink), - [**@mfisher87**](https://github.com/mfisher87), - [**@jhkennedy**](https://github.com/jhkennedy)) + ([@betolink](https://github.com/betolink), + [@mfisher87](https://github.com/mfisher87), + [@jhkennedy](https://github.com/jhkennedy)) ## [0.8.2] - 2023-12-06 @@ -169,43 +194,43 @@ - Update poetry lockfile ([#403](https://github.com/nsidc/earthaccess/pull/403)) - ([**@jrbourbeau**](https://github.com/jrbourbeau)) + ([@jrbourbeau](https://github.com/jrbourbeau)) - Use YAML formatting (prettier) ([#398](https://github.com/nsidc/earthaccess/pull/398)) - ([**@jrbourbeau**](https://github.com/jrbourbeau)) + ([@jrbourbeau](https://github.com/jrbourbeau)) ### Added - Add CI tests with minimum supported versions ([#402](https://github.com/nsidc/earthaccess/pull/402)) - ([**@jrbourbeau**](https://github.com/jrbourbeau), - [**@mfisher87**](https://github.com/mfisher87), - [**@jhkennedy**](https://github.com/jhkennedy)) + ([@jrbourbeau](https://github.com/jrbourbeau), + [@mfisher87](https://github.com/mfisher87), + [@jhkennedy](https://github.com/jhkennedy)) - Add `python-dateutil` as a direct dependency ([#397](https://github.com/nsidc/earthaccess/pull/397)) - ([**@jrbourbeau**](https://github.com/jrbourbeau)) + ([@jrbourbeau](https://github.com/jrbourbeau)) ### Removed - Remove binder PR comments ([#400](https://github.com/nsidc/earthaccess/pull/400)) - ([**@jrbourbeau**](https://github.com/jrbourbeau)) + ([@jrbourbeau](https://github.com/jrbourbeau)) ### Fixed - Enable AWS check with IMDSv2 ([#391](https://github.com/nsidc/earthaccess/pull/391)) - ([**@jrbourbeau**](https://github.com/jrbourbeau), - [**@mfisher87**](https://github.com/mfisher87), - [**@itcarroll**](https://github.com/itcarroll)) + ([@jrbourbeau](https://github.com/jrbourbeau), + [@mfisher87](https://github.com/mfisher87), + [@itcarroll](https://github.com/itcarroll)) - Add region to running in AWS check ([#395](https://github.com/nsidc/earthaccess/pull/395)) - ([**@jrbourbeau**](https://github.com/jrbourbeau), - [**@betolink**](https://github.com/betolink)) + ([@jrbourbeau](https://github.com/jrbourbeau), + [@betolink](https://github.com/betolink)) - Handle opening multi-file granules ([#394](https://github.com/nsidc/earthaccess/pull/394)) - ([**@jrbourbeau**](https://github.com/jrbourbeau), - [**@betolink**](https://github.com/betolink)) + ([@jrbourbeau](https://github.com/jrbourbeau), + [@betolink](https://github.com/betolink)) ## [0.8.1] - 2023-12-01 @@ -213,29 +238,29 @@ - Handle S3 credential expiration more gracefully ([#354](https://github.com/nsidc/earthaccess/pull/354)) - ([**@jrbourbeau**](https://github.com/jrbourbeau), - [**@mfisher87**](https://github.com/mfisher87)) + ([@jrbourbeau](https://github.com/jrbourbeau), + [@mfisher87](https://github.com/mfisher87)) - Use dependabot to update GitHub Actions ([#373](https://github.com/nsidc/earthaccess/pull/373)) - ([**@jhkennedy**](https://github.com/jhkennedy)) + ([@jhkennedy](https://github.com/jhkennedy)) - Consolidate dependabot updates ([#380](https://github.com/nsidc/earthaccess/pull/380)) - ([**@mfisher87**](https://github.com/mfisher87)) + ([@mfisher87](https://github.com/mfisher87)) - Switch to `ruff` for formatting ([#372](https://github.com/nsidc/earthaccess/pull/372)) - ([**@jrbourbeau**](https://github.com/jrbourbeau), - [**@mfisher87**](https://github.com/mfisher87)) + ([@jrbourbeau](https://github.com/jrbourbeau), + [@mfisher87](https://github.com/mfisher87)) ### Added - Add `kerchunk` metadata consolidation utility ([#278](https://github.com/nsidc/earthaccess/pull/278)) - ([**@jrbourbeau**](https://github.com/jrbourbeau), - [**@mfisher87**](https://github.com/mfisher87), - [**@betolink**](https://github.com/betolink), - [**@martindurant**](https://github.com/martindurant), - [**lsterzinger**](https://github.com/lsterzinger), - [**mrocklin**](https://github.com/mrocklin)) + ([@jrbourbeau](https://github.com/jrbourbeau), + [@mfisher87](https://github.com/mfisher87), + [@betolink](https://github.com/betolink), + [@martindurant](https://github.com/martindurant), + [@lsterzinger](https://github.com/lsterzinger), + [@mrocklin](https://github.com/mrocklin)) ## [0.8.0] - 2023-11-29 @@ -243,22 +268,22 @@ - Raise errors instead of `print`ing them in more cases ([#351](https://github.com/nsidc/earthaccess/pull/351)) - ([**@jrbourbeau**](https://github.com/jrbourbeau)) + ([@jrbourbeau](https://github.com/jrbourbeau)) - `daac` and `provider` parameters are now normalized to uppercase, since lowercase characters are never valid ([#355](https://github.com/nsidc/earthaccess/pull/355)) - ([**@jrbourbeau**](https://github.com/jrbourbeau), - [**@mfisher87**](https://github.com/mfisher87)) + ([@jrbourbeau](https://github.com/jrbourbeau), + [@mfisher87](https://github.com/mfisher87)) - Allow single file URL inputs for `earthaccess.download` ([#347](https://github.com/nsidc/earthaccess/pull/347)) - ([**@jrbourbeau**](https://github.com/jrbourbeau), - [**@mfisher87**](https://github.com/mfisher87)) + ([@jrbourbeau](https://github.com/jrbourbeau), + [@mfisher87](https://github.com/mfisher87)) ### Fixed - Fix zero granules being reported for restricted datasets ([#358](https://github.com/nsidc/earthaccess/pull/358)) - ([**@danielfromearth**](https://github.com/danielfromearth)) + ([@danielfromearth](https://github.com/danielfromearth)) ## [0.7.1] - 2023-11-08 @@ -266,7 +291,7 @@ - Treat granules without `RelatedUrls` as not cloud-hosted ([#339](https://github.com/nsidc/earthaccess/pull/339)) - ([**@mfisher87**](https://github.com/mfisher87)) + ([@mfisher87](https://github.com/mfisher87)) ## [0.7.0] - 2023-10-31 @@ -274,10 +299,10 @@ - `earthaccess.download` now accepts a single granule as input in addition to a list of granules ([#317](https://github.com/nsidc/earthaccess/pull/317)) - ([**@jrbourbeau**](https://github.com/jrbourbeau)) + ([@jrbourbeau](https://github.com/jrbourbeau)) - `earthaccess.download` now returns fully qualified local file paths ([#317](https://github.com/nsidc/earthaccess/pull/317)) - ([**@jrbourbeau**](https://github.com/jrbourbeau)) + ([@jrbourbeau](https://github.com/jrbourbeau)) ### Added @@ -286,29 +311,28 @@ you have a ``~/.netrc`` file for have set ``EARTHDATA_USERNAME`` and ``EARTHDATA_PASSWORD`` environment variables ([#300](https://github.com/nsidc/earthaccess/pull/300)) - ([**@jrbourbeau**](https://github.com/jrbourbeau), - [**@mfisher87**](https://github.com/mfisher87), - [**@mfisher87**](https://github.com/mfisher87)) + ([@jrbourbeau](https://github.com/jrbourbeau), + [@mfisher87](https://github.com/mfisher87)) - Add `earthaccess.auth_environ()` utility for getting Earthdata authentication environment variables ([#316](https://github.com/nsidc/earthaccess/pull/316)) - ([**@jrbourbeau**](https://github.com/jrbourbeau), - [**@mfisher87**](https://github.com/mfisher87)) + ([@jrbourbeau](https://github.com/jrbourbeau), + [@mfisher87](https://github.com/mfisher87)) ### Fixed - Fix spelling mistake in `access` variable assignment (`direc` -> `direct`) in `earthaccess.store._get_granules` ([#308](https://github.com/nsidc/earthaccess/pull/308)) - ([**@trey-stafford**](https://github.com/trey-stafford)) + ([@trey-stafford](https://github.com/trey-stafford)) - Pass `threads` arg to `_open_urls_https` in `earthaccess.store._open_urls`, replacing the hard-coded value of 8 ([#308](https://github.com/nsidc/earthaccess/pull/308)) - ([**@trey-stafford**](https://github.com/trey-stafford)) + ([@trey-stafford](https://github.com/trey-stafford)) - Return S3 data links by default when in region ([#318](https://github.com/nsidc/earthaccess/pull/318)) - ([**@jrbourbeau**](https://github.com/jrbourbeau), - [**@mfisher87**](https://github.com/mfisher87), - [**@jhkennedy**](https://github.com/jhkennedy)) + ([@jrbourbeau](https://github.com/jrbourbeau), + [@mfisher87](https://github.com/mfisher87), + [@jhkennedy](https://github.com/jhkennedy)) ## [0.6.0] - 2023-09-20 @@ -316,19 +340,19 @@ - `earthaccess.get_s3fs_session()` can use the results to find the right set of S3 credentials ([#296](https://github.com/nsidc/earthaccess/pull/296)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) ### Fixed - `earthaccess.search_datasets()` and `earthaccess.search_data()` can find restricted datasets ([#296](https://github.com/nsidc/earthaccess/pull/296)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - Fix distributed serialization for EarthAccessFile ([#301](https://github.com/nsidc/earthaccess/pull/301)) - ([**@jrbourbeau**](https://github.com/jrbourbeau)) and + ([@jrbourbeau](https://github.com/jrbourbeau)) and ([#276](https://github.com/nsidc/earthaccess/pull/276)) - ([**@jrbourbeau**](https://github.com/jrbourbeau), - [**@betolink**](https://github.com/betolink)) + ([@jrbourbeau](https://github.com/jrbourbeau), + [@betolink](https://github.com/betolink)) ## [0.5.3] - 2023-08-01 @@ -336,36 +360,36 @@ - Add download from onprem how-to ([#265](https://github.com/nsidc/earthaccess/pull/265)) - ([**@andypbarrett**](https://github.com/andypbarrett)) + ([@andypbarrett](https://github.com/andypbarrett)) ### Changed - For CI, integration tests are now only run when we push to main (after a merge) ([#267](https://github.com/nsidc/earthaccess/pull/267)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - For CI, unit tests are run for any branch and opened PR ([#267](https://github.com/nsidc/earthaccess/pull/267)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - Update example in `search_datasets` ([#247](https://github.com/nsidc/earthaccess/pull/247)) - ([**@jrbourbeau**](https://github.com/jrbourbeau)) + ([@jrbourbeau](https://github.com/jrbourbeau)) - Improve remote cluster performance ([#259](https://github.com/nsidc/earthaccess/pull/259)) - ([**@jrbourbeau**](https://github.com/jrbourbeau), - [**@mrocklin**](https://github.com/mrocklin), - [**@mfisher87**](https://github.com/mfisher87)) + ([@jrbourbeau](https://github.com/jrbourbeau), + [@mrocklin](https://github.com/mrocklin), + [@mfisher87](https://github.com/mfisher87)) - Return useful error message for failed download ([#263](https://github.com/nsidc/earthaccess/pull/263)) - ([**@andypbarrett**](https://github.com/andypbarrett)) + ([@andypbarrett](https://github.com/andypbarrett)) ### Fixed - Granule's size() returned zero ([#267](https://github.com/nsidc/earthaccess/pull/267)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - Add exception handling for fsspec sessions, thanks to @jrbourbeau ([#249](https://github.com/nsidc/earthaccess/pull/249)) - ([**@jrbourbeau**](https://github.com/jrbourbeau)) + ([@jrbourbeau](https://github.com/jrbourbeau)) ## [0.5.2] - 2023-04-21 @@ -374,14 +398,14 @@ - Remove Benedict (core dependency) as the default dict for JSON coming from CMR ([#229](https://github.com/nsidc/earthaccess/pull/229), [#230](https://github.com/nsidc/earthaccess/issues/230)) - ([**@psarka**](https://github.com/psarka)) + ([@psarka](https://github.com/psarka)) ### Fixed - S3 credentials endpoints are tried with tokens and basic auth until all the DAACs accept the same auth ([#234](https://github.com/nsidc/earthaccess/pull/234)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) ## [0.5.1] - 2023-03-20 @@ -389,41 +413,41 @@ - For CI, documentation for readthedocs fixed by including poetry as the default tool ([#214](https://github.com/nsidc/earthaccess/pull/214)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - For CI, injected new secrets to test Auth using the icepyx convention (EARTHDATA_USERNAME) ([#214](https://github.com/nsidc/earthaccess/pull/214)) - ([**@JessicaS11**](https://github.com/JessicaS11), - [**@betolink**](https://github.com/betolink)) + ([@JessicaS11](https://github.com/JessicaS11), + [@betolink](https://github.com/betolink)) ### Added - Add ability to get the user's profile with auth.user_profile which includes the user email ([#214](https://github.com/nsidc/earthaccess/pull/214)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - Add LAAD as a supported DAAC ([#214](https://github.com/nsidc/earthaccess/pull/214)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) ### Removed - Remove magic from dependencies (not available in windows and not used but just in tests) ([#214](https://github.com/nsidc/earthaccess/pull/214)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) ### Fixed - `get_s3_credentials()` only worked when a netrc file was present, bug reported by @scottyhq and @JessicaS11 ([#214](https://github.com/nsidc/earthaccess/pull/214)) - ([**@betolink**](https://github.com/betolink), - [**@JessicaS11**](https://github.com/JessicaS11), - [**@scottyhq**](https://github.com/scottyhq)) + ([@betolink](https://github.com/betolink), + [@JessicaS11](https://github.com/JessicaS11), + [@scottyhq](https://github.com/scottyhq)) - Include tests for all DAAC S3 endpoints ([#214](https://github.com/nsidc/earthaccess/pull/214)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - Update notebooks to use the new top level API ([#214](https://github.com/nsidc/earthaccess/pull/214)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) ## [0.5.0] - 2023-02-23 @@ -431,37 +455,37 @@ - For CI, documentation is now only built for the main, dev and documentation branches ([#202](https://github.com/nsidc/earthaccess/pull/202)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - For CI, notebooks are executed every time the documentation gets published! ([#202](https://github.com/nsidc/earthaccess/pull/202)) - ([**@betolink**](https://github.com/betolink), - [**@asteiker**](https://github.com/asteiker)) + ([@betolink](https://github.com/betolink), + [@asteiker](https://github.com/asteiker)) ### Added - Add ability to use the top level API to get S3 credentials, authenticated fsspec and requests sessions! ([#202](https://github.com/nsidc/earthaccess/pull/202)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - Make available ASF direct access for Sentinel1 products ([#202](https://github.com/nsidc/earthaccess/pull/202)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) ### Fixed - Fix a bug where the Auth class is invoked without the proper parameters ([#202](https://github.com/nsidc/earthaccess/pull/202)) - ([**@JessicaS11**](https://github.com/JessicaS11)) + ([@JessicaS11](https://github.com/JessicaS11)) - Raise and exception if a user specifies the netrc strategy and there is no netrc ([#202](https://github.com/nsidc/earthaccess/pull/202)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - S3 URLs broke the Store class when opened outside AWS ([#202](https://github.com/nsidc/earthaccess/pull/202)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - Opening files using URLs was not working properly on AWS, thanks to @amfriesz for reporting it! ([#202](https://github.com/nsidc/earthaccess/pull/202)) - ([**@betolink**](https://github.com/betolink), - [**@amfriesz**](https://github.com/amfriesz)) + ([@betolink](https://github.com/betolink), + [@amfriesz](https://github.com/amfriesz)) ## [0.4.7] - 2022-12-11 @@ -469,13 +493,13 @@ - Fix open() for direct access ([#186](https://github.com/nsidc/earthaccess/pull/186)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - Move python-magic to the dev section because it is a test dependency ([#186](https://github.com/nsidc/earthaccess/pull/186)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - Make minor edits in the README ([#186](https://github.com/nsidc/earthaccess/pull/186)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) ## [0.4.6] - 2022-12-08 @@ -483,16 +507,16 @@ - For CI, only run the publish workflow after a release on GitHub ([#183](https://github.com/nsidc/earthaccess/pull/183)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) ### Added - Add feature to search collections by DOI ([#183](https://github.com/nsidc/earthaccess/pull/183)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - Add new API documentation and simplify notation to access data ([#183](https://github.com/nsidc/earthaccess/pull/183)) - ([**@jroebuck932**](https://github.com/jroebuck932)) + ([@jroebuck932](https://github.com/jroebuck932)) ## [0.4.1] - 2022-11-02 @@ -500,25 +524,25 @@ - For CI, install Poetry using the new script ([#131](https://github.com/nsidc/earthaccess/pull/131)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - For CI, change dependabot alerts to monthly ([#131](https://github.com/nsidc/earthaccess/pull/131)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - Improve documentation by reimplementing python_cmr methods for docstring compatibility ([#131](https://github.com/nsidc/earthaccess/pull/131)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - Use `CMR-Search-After` ([#145](https://github.com/nsidc/earthaccess/issues/145)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) ### Added - Add GES_DISC S3 endpoint ([#131](https://github.com/nsidc/earthaccess/pull/131)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - Improve documentation by adding types to method signatures ([#131](https://github.com/nsidc/earthaccess/pull/131)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) ## [0.4.0] - 2022-08-17 @@ -526,16 +550,16 @@ - Add store, auth to docs and update mkdocs config ([#119](https://github.com/nsidc/earthaccess/pull/119)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - For `auth`, add the ability to persist credentials into a `.netrc` file ([#119](https://github.com/nsidc/earthaccess/pull/119)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - For `store`, use fsspec s3fs for in cloud access and https sessions for out of region access ([#43](https://github.com/nsidc/earthaccess/issues/43)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - For `store`, can open files with fsspec in and out of region (stream into xarray) ([#41](https://github.com/nsidc/earthaccess/issues/41)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) ## [0.3.0] - 2022-04-28 @@ -543,33 +567,33 @@ - Update python-cmr to NASA fork ([#75](https://github.com/nsidc/earthaccess/pull/75)) - ([**@jhkennedy**](https://github.com/jhkennedy)) + ([@jhkennedy](https://github.com/jhkennedy)) - Drop unused `pydantic` dependency ([`5761548`](https://github.com/nsidc/earthaccess/pull/75/commits/5761548fcd8ba8733ce4f5ff9b8ce7967c3a8398)) - ([**@jhkennedy**](https://github.com/jhkennedy)) + ([@jhkennedy](https://github.com/jhkennedy)) - Auth can refresh CMR tokens ([#82](https://github.com/nsidc/earthaccess/pull/82)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - Verify git tag and poetry version are the same before publishing to PyPI ### Added - Add documentation for readthedocs and GitHub ([#82](https://github.com/nsidc/earthaccess/pull/82)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) ### Removed - **Breaking**: Drop python 3.7 support ([#82](https://github.com/nsidc/earthaccess/pull/82)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) ### Fixed - Fix bug with CMR tokens - Add missing `python-datutil` dependency ([`747e992`](https://github.com/nsidc/earthaccess/pull/75/commits/747e9926a5ab83d75bbf7f17d4c52f24b563147b)) - ([**@jhkennedy**](https://github.com/jhkennedy)) + ([@jhkennedy](https://github.com/jhkennedy)) ## [0.2.2] - 2022-03-23 @@ -577,9 +601,9 @@ - Fix store to download multi-file granules ([#73](https://github.com/nsidc/earthaccess/pull/73)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - Fix granule formatting ([#73](https://github.com/nsidc/earthaccess/pull/73)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) ## [0.2.1] - 2022-03-19 @@ -587,13 +611,13 @@ - Rename Accessor to Store ([`4bd618d`](https://github.com/nsidc/earthaccess/pull/66/commits/4bd618d4d48c3cd256a077fb8329f40df2d5b7ff)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - Relax dependency requirements ([`c9a5ed6`](https://github.com/nsidc/earthaccess/pull/66/commits/c9a5ed6b917435e7c4ece58485939065fa71cc8f)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) - Store can download plain links if they are on prem ([`92d2919`](https://github.com/nsidc/earthaccess/commit/92d291962e5b72b458c2971eae8a6b813d4bae39)) - ([**@betolink**](https://github.com/betolink)) + ([@betolink](https://github.com/betolink)) ## [0.1.0-beta.1] - 2021-09-21 @@ -604,7 +628,8 @@ _Conception!_ - Add basic classes to interact with NASA CMR, EDL and cloud access. - Basic object formatting. -[Unreleased]: https://github.com/nsidc/earthaccess/compare/v0.10.0...HEAD +[Unreleased]: https://github.com/nsidc/earthaccess/compare/v0.11.0...HEAD +[0.11.0]: https://github.com/nsidc/earthaccess/releases/tag/v0.11.0 [0.10.0]: https://github.com/nsidc/earthaccess/releases/tag/v0.10.0 [0.9.0]: https://github.com/nsidc/earthaccess/releases/tag/v0.9.0 [0.8.2]: https://github.com/nsidc/earthaccess/releases/tag/v0.8.2 diff --git a/docs/contributing/development.md b/docs/contributing/development.md index bbccecd6..f97ae682 100644 --- a/docs/contributing/development.md +++ b/docs/contributing/development.md @@ -20,20 +20,29 @@ If you don't have pipx (pip for applications), then you can install with pip is reasonable). If you use macOS, then pipx and nox are both in brew, use `brew install pipx nox`. -To use, run `nox`. This will typecheck and test using every installed version of -Python on your system, skipping ones that are not installed. You can also run -specific jobs: +To use, run `nox` without any arguments. This will run type checks and unit +tests using the installed version of Python on your system. + +You can also run individual tasks (_sessions_ in `nox` parlance, hence the `-s` +option below), like so: ```console -$ nox -s typecheck # Typecheck only -$ nox -s tests # Python tests -$ nox -s serve_docs # Build and serve the docs -$ nox -s build_pkg # Make an SDist and Wheel +nox -s typecheck # Run typechecks +nox -s tests # Run unit tests +nox -s integration-tests # Run integration tests (see note below) +nox -s serve_docs # Build and serve the docs +nox -s build_pkg # Build an SDist and Wheel ``` Nox handles everything for you, including setting up a temporary virtual environment for each run. +**NOTE:** In order to run integration tests locally, you must set the +environment variables `EARTHDATA_USERNAME` and `EARTHDATA_PASSWORD` to your +username and password, respectively, of your +[NASA Earthdata](https://urs.earthdata.nasa.gov/) account (registration is +free). + ## Manual development environment setup While `nox` is the fastest way to get started, you will likely need a full diff --git a/docs/howto/authenticate.md b/docs/howto/authenticate.md index 407e5140..ec94f5ea 100644 --- a/docs/howto/authenticate.md +++ b/docs/howto/authenticate.md @@ -1,10 +1,14 @@ -## Authenticate with Earthdata Login +# Authenticate with Earthdata Login -The first step to use NASA Earthdata is to create an account with Earthdata Login, please follow the instructions at [NASA EDL](https://urs.earthdata.nasa.gov/) +The first step to use NASA Earthdata is to create an account with Earthdata +Login, please follow the instructions at +[NASA EDL](https://urs.earthdata.nasa.gov/) -Once registered, earthaccess can use environment variables, a `.netrc` file or interactive input from a user to login with NASA EDL. +Once registered, earthaccess can use environment variables, a `.netrc` file or +interactive input from a user to login with NASA EDL. -If a strategy is not especified, env vars will be used first, then netrc and finally user's input. +If a strategy is not specified, environment variables will be used first, then +a `.netrc` (if found, see below), and finally a user's input. ```py import earthaccess @@ -12,35 +16,48 @@ import earthaccess auth = earthaccess.login() ``` -If you have a .netrc file with your Earthdata Login credentials +If you have a `.netrc` file (see below) with your Earthdata Login credentials, +you can explicitly specify its use: ```py auth = earthaccess.login(strategy="netrc") ``` -If your Earthdata Login credentials are set as environment variables: EARTHDATA_USERNAME, EARTHDATA_PASSWORD +If your Earthdata Login credentials are set as the environment variables +`EARTHDATA_USERNAME` and `EARTHDATA_PASSWORD`, you can explicitly specify their +use: ```py auth = earthaccess.login(strategy="environment") ``` -If you wish to enter your Earthdata Login credentials when prompted with optional persistence to .netrc +If you wish to enter your Earthdata Login credentials when prompted, with +optional persistence to your `.netrc` file (see below), specify the interactive +strategy: ```py auth = earthaccess.login(strategy="interactive", persist=True) ``` +## Authentication +By default, `earthaccess` with automatically look for your EDL account +credentials in two locations: -### **Authentication** +1. A `.netrc` file: By default, this is either `~/_netrc` (on a Windows system) + or `~/.netrc` (on a non-Windows system). On *any* system, you may override + the default location by setting the `NETRC` environment variable to the path + of your desired `.netrc` file. -By default, `earthaccess` with automatically look for your EDL account credentials in two locations: - -1. A `~/.netrc` file + **NOTE**: When setting the `NETRC` environment variable, there is no + requirement to use a specific filename. The name `.netrc` is common, but + used throughout documentation primarily for convenience. The only + requirement is that the *contents* of the file adhere to the + [`.netrc` file format](https://www.gnu.org/software/inetutils/manual/html_node/The-_002enetrc-file.html). 2. `EARTHDATA_USERNAME` and `EARTHDATA_PASSWORD` environment variables -If neither of these options are configured, you can authenticate by calling the `earthaccess.login()` method -and manually entering your EDL account credentials. +If neither of these options are configured, you can authenticate by calling the +`earthaccess.login()` method and manually entering your EDL account credentials. ```python import earthaccess @@ -48,27 +65,26 @@ import earthaccess earthaccess.login() ``` -Note you can pass `persist=True` to `earthaccess.login()` to have the EDL account credentials you enter -automatically saved to a `~/.netrc` file for future use. - +Note you can pass `persist=True` to `earthaccess.login()` to have the EDL +account credentials you enter automatically saved to your `.netrc` file (see +above) for future use. Once you are authenticated with NASA EDL you can: * Get a file from a DAAC using a `fsspec` session. -* Request temporary S3 credentials from a particular DAAC (needed to download or stream data from an S3 bucket in the cloud). +* Request temporary S3 credentials from a particular DAAC (needed to download or + stream data from an S3 bucket in the cloud). * Use the library to download or stream data directly from S3. * Regenerate CMR tokens (used for restricted datasets). +## Earthdata User Acceptance Testing (UAT) environment -### Earthdata User Acceptance Testing (UAT) environment - -If your EDL account is authorized to access the User Acceptance Testing (UAT) system, -you can set earthaccess to work with its EDL and CMR endpoints -by setting the `system` argument at login, as follows: +If your EDL account is authorized to access the User Acceptance Testing (UAT) +system, you can set earthaccess to work with its EDL and CMR endpoints by +setting the `system` argument at login, as follows: ```python import earthaccess earthaccess.login(system=earthaccess.UAT) - ``` diff --git a/earthaccess/__init__.py b/earthaccess/__init__.py index 6d7d0def..73f7ed2d 100644 --- a/earthaccess/__init__.py +++ b/earthaccess/__init__.py @@ -1,6 +1,7 @@ import logging import threading from importlib.metadata import version +from typing import Optional from .api import ( auth_environ, @@ -21,7 +22,7 @@ ) from .auth import Auth from .kerchunk import consolidate_metadata -from .search import DataCollections, DataGranules +from .search import DataCollection, DataCollections, DataGranule, DataGranules from .services import DataServices from .store import Store from .system import PROD, UAT @@ -46,7 +47,9 @@ "download", "auth_environ", # search.py + "DataGranule", "DataGranules", + "DataCollection", "DataCollections", "DataServices", # auth.py @@ -62,7 +65,7 @@ __version__ = version("earthaccess") _auth = Auth() -_store = None +_store: Optional[Store] = None _lock = threading.Lock() diff --git a/earthaccess/auth.py b/earthaccess/auth.py index 4332379a..3a3b209c 100644 --- a/earthaccess/auth.py +++ b/earthaccess/auth.py @@ -25,6 +25,24 @@ logger = logging.getLogger(__name__) +def netrc_path() -> Path: + """Return the path of the `.netrc` file. + + The path may or may not exist. + + See [the `.netrc` file](https://www.gnu.org/software/inetutils/manual/html_node/The-_002enetrc-file.html). + + Returns: + `Path` of the `NETRC` environment variable, if the value is non-empty; + otherwise, the path of the platform-specific default location: + `~/_netrc` on Windows systems, `~/.netrc` on non-Windows systems. + """ + sys_netrc_name = "_netrc" if platform.system() == "Windows" else ".netrc" + env_netrc = os.environ.get("NETRC") + + return Path(env_netrc) if env_netrc else Path.home() / sys_netrc_name + + class SessionWithHeaderRedirection(requests.Session): """Requests removes auth headers if the redirect happens outside the original req domain. @@ -104,11 +122,12 @@ def login( if self.authenticated and (system == self.system): logger.debug("We are already authenticated with NASA EDL") return self + if strategy == "interactive": self._interactive(persist) - if strategy == "netrc": + elif strategy == "netrc": self._netrc() - if strategy == "environment": + elif strategy == "environment": self._environment() return self @@ -222,25 +241,29 @@ def _interactive(self, persist_credentials: bool = False) -> bool: if authenticated: logger.debug("Using user provided credentials for EDL") if persist_credentials: - logger.info("Persisting credentials to .netrc") self._persist_user_credentials(username, password) return authenticated def _netrc(self) -> bool: + netrc_loc = netrc_path() + try: - my_netrc = Netrc() + my_netrc = Netrc(str(netrc_loc)) except FileNotFoundError as err: - raise FileNotFoundError(f"No .netrc found in {Path.home()}") from err + raise FileNotFoundError(f"No .netrc found at {netrc_loc}") from err except NetrcParseError as err: - raise NetrcParseError("Unable to parse .netrc") from err + raise NetrcParseError(f"Unable to parse .netrc file {netrc_loc}") from err + if (creds := my_netrc[self.system.edl_hostname]) is None: return False username = creds["login"] password = creds["password"] authenticated = self._get_credentials(username, password) + if authenticated: logger.debug("Using .netrc file for EDL") + return authenticated def _environment(self) -> bool: @@ -293,33 +316,41 @@ def _find_or_create_token(self, username: str, password: str) -> Any: def _persist_user_credentials(self, username: str, password: str) -> bool: # See: https://github.com/sloria/tinynetrc/issues/34 + + netrc_loc = netrc_path() + logger.info(f"Persisting credentials to {netrc_loc}") + try: - netrc_path = Path().home().joinpath(".netrc") - netrc_path.touch(exist_ok=True) - netrc_path.chmod(0o600) + netrc_loc.touch(exist_ok=True) + netrc_loc.chmod(0o600) except Exception as e: logger.error(e) return False - my_netrc = Netrc(str(netrc_path)) + + my_netrc = Netrc(str(netrc_loc)) my_netrc[self.system.edl_hostname] = { "login": username, "password": password, } my_netrc.save() + urs_cookies_path = Path.home() / ".urs_cookies" + if not urs_cookies_path.exists(): urs_cookies_path.write_text("") # Create and write to .dodsrc file dodsrc_path = Path.home() / ".dodsrc" + if not dodsrc_path.exists(): dodsrc_contents = ( - f"HTTP.COOKIEJAR={urs_cookies_path}\nHTTP.NETRC={netrc_path}" + f"HTTP.COOKIEJAR={urs_cookies_path}\nHTTP.NETRC={netrc_loc}" ) dodsrc_path.write_text(dodsrc_contents) if platform.system() == "Windows": local_dodsrc_path = Path.cwd() / dodsrc_path.name + if not local_dodsrc_path.exists(): shutil.copy2(dodsrc_path, local_dodsrc_path) diff --git a/earthaccess/kerchunk.py b/earthaccess/kerchunk.py index 26758184..9ee40dec 100644 --- a/earthaccess/kerchunk.py +++ b/earthaccess/kerchunk.py @@ -1,34 +1,39 @@ from __future__ import annotations +from typing import Optional, Union + import fsspec +import fsspec.utils import s3fs import earthaccess def _get_chunk_metadata( - granule: earthaccess.results.DataGranule, - fs: fsspec.AbstractFileSystem | s3fs.S3FileSystem, + granule: earthaccess.DataGranule, + fs: fsspec.AbstractFileSystem, ) -> list[dict]: from kerchunk.hdf import SingleHdf5ToZarr metadata = [] access = "direct" if isinstance(fs, s3fs.S3FileSystem) else "indirect" + for url in granule.data_links(access=access): with fs.open(url) as inf: h5chunks = SingleHdf5ToZarr(inf, url) m = h5chunks.translate() metadata.append(m) + return metadata def consolidate_metadata( - granules: list[earthaccess.results.DataGranule], - kerchunk_options: dict | None = None, + granules: list[earthaccess.DataGranule], + kerchunk_options: Optional[dict] = None, access: str = "direct", - outfile: str | None = None, - storage_options: dict | None = None, -) -> str | dict: + outfile: Optional[str] = None, + storage_options: Optional[dict] = None, +) -> Union[str, dict]: try: import dask @@ -44,15 +49,16 @@ def consolidate_metadata( fs = earthaccess.get_fsspec_https_session() # Get metadata for each granule - get_chunk_metadata = dask.delayed(_get_chunk_metadata) - chunks = dask.compute(*[get_chunk_metadata(g, fs) for g in granules]) + get_chunk_metadata = dask.delayed(_get_chunk_metadata) # type: ignore + chunks = dask.compute(*[get_chunk_metadata(g, fs) for g in granules]) # type: ignore chunks = sum(chunks, start=[]) # Get combined metadata object mzz = MultiZarrToZarr(chunks, **(kerchunk_options or {})) - if outfile is not None: - output = fsspec.utils.stringify_path(outfile) - mzz.translate(outfile, storage_options=storage_options or {}) - return output - else: + + if outfile is None: return mzz.translate() + + output = fsspec.utils.stringify_path(outfile) + mzz.translate(outfile, storage_options=storage_options or {}) + return output diff --git a/earthaccess/store.py b/earthaccess/store.py index 31f09a44..fd399d44 100644 --- a/earthaccess/store.py +++ b/earthaccess/store.py @@ -63,11 +63,11 @@ def __repr__(self) -> str: def _open_files( url_mapping: Mapping[str, Union[DataGranule, None]], fs: fsspec.AbstractFileSystem, - threads: Optional[int] = 8, + threads: int = 8, ) -> List[fsspec.spec.AbstractBufferedFile]: - def multi_thread_open(data: tuple) -> EarthAccessFile: - urls, granule = data - return EarthAccessFile(fs.open(urls), granule) + def multi_thread_open(data: tuple[str, Optional[DataGranule]]) -> EarthAccessFile: + url, granule = data + return EarthAccessFile(fs.open(url), granule) # type: ignore fileset = pqdm(url_mapping.items(), multi_thread_open, n_jobs=threads) return fileset @@ -365,7 +365,7 @@ def _open_granules( self, granules: List[DataGranule], provider: Optional[str] = None, - threads: Optional[int] = 8, + threads: int = 8, ) -> List[Any]: fileset: List = [] total_size = round(sum([granule.size() for granule in granules]) / 1024, 2) @@ -419,7 +419,7 @@ def _open_urls( self, granules: List[str], provider: Optional[str] = None, - threads: Optional[int] = 8, + threads: int = 8, ) -> List[Any]: fileset: List = [] @@ -685,14 +685,14 @@ def _download_onprem_granules( def _open_urls_https( self, url_mapping: Mapping[str, Union[DataGranule, None]], - threads: Optional[int] = 8, + threads: int = 8, ) -> List[fsspec.AbstractFileSystem]: https_fs = self.get_fsspec_session() - if https_fs is not None: - try: - fileset = _open_files(url_mapping, https_fs, threads) - except Exception: - logger.exception( - "An exception occurred while trying to access remote files via HTTPS" - ) - return fileset + + try: + return _open_files(url_mapping, https_fs, threads) + except Exception: + logger.exception( + "An exception occurred while trying to access remote files via HTTPS" + ) + raise diff --git a/noxfile.py b/noxfile.py index 858bedc9..f9d42ba1 100644 --- a/noxfile.py +++ b/noxfile.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os import shutil from pathlib import Path @@ -26,6 +27,21 @@ def tests(session: nox.Session) -> None: session.run("pytest", "tests/unit", *session.posargs) +@nox.session(name="integration-tests") +def integration_tests(session: nox.Session) -> None: + """Run the integration tests.""" + session.install("--editable", ".[test]") + session.run( + "scripts/integration-test.sh", + *session.posargs, + env=dict( + EARTHDATA_USERNAME=os.environ["EARTHDATA_USERNAME"], + EARTHDATA_PASSWORD=os.environ["EARTHDATA_PASSWORD"], + ), + external=True, + ) + + @nox.session def build_pkg(session: nox.Session) -> None: """Build a source distribution and binary distribution (wheel).""" diff --git a/pyproject.toml b/pyproject.toml index c6fb6800..02e7e4d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,8 +5,6 @@ build-backend = "hatchling.build" [project] name = "earthaccess" version = "0.11.0" -repository = "https://github.com/nsidc/earthaccess" -documentation = "https://earthaccess.readthedocs.io" description = "Client library for NASA Earthdata APIs" authors = [ {name = "earthaccess contributors"} @@ -49,16 +47,24 @@ dependencies = [ "multimethod >=1.8", "importlib-resources >=6.3.2", "typing_extensions >=4.10.0", - # kerchunk requires numpy, but numpy >=1.26.0 is required for Python 3.12 - # support - "numpy >=1.24.0; python_version < '3.12'", - "numpy >=1.26.0; python_version >= '3.12'", ] +[project.urls] +Repository = "https://github.com/nsidc/earthaccess" +Documentation = "https://earthaccess.readthedocs.io" +"Bug Tracker" = "https://github.com/nsidc/earthaccess/issues" +Changelog = "https://github.com/nsidc/earthaccess/blob/main/CHANGELOG.md" + [project.optional-dependencies] kerchunk = [ "kerchunk", "dask", + "h5py >=3.0", + "h5netcdf", + "xarray", + # kerchunk requires numpy, but numpy >=1.26.0 is required for Python 3.12 + "numpy >=1.24.0; python_version < '3.12'", + "numpy >=1.26.0; python_version >= '3.12'", ] dev = [ "bump-my-version >=0.10.0", @@ -77,6 +83,7 @@ test = [ "types-requests >=0.1", "types-setuptools >=0.1", "vcrpy >=6.0.1", + "earthaccess[kerchunk]", ] docs = [ "jupyterlab >=3", diff --git a/scripts/integration-test.sh b/scripts/integration-test.sh index 15b173f8..506976ad 100755 --- a/scripts/integration-test.sh +++ b/scripts/integration-test.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -x -pytest tests/integration --cov=earthaccess --cov=tests/integration --cov-report=term-missing ${@} --capture=no --tb=native --log-cli-level=INFO +pytest tests/integration --cov=earthaccess --cov=tests/integration --cov-report=term-missing "${@}" --capture=no --tb=native --log-cli-level=INFO RET=$? set +x diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index c2d4a3c2..8c206885 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,3 +1,7 @@ +import os +import pathlib + +import earthaccess import pytest ACCEPTABLE_FAILURE_RATE = 10 @@ -29,3 +33,31 @@ def pytest_sessionfinish(session, exitstatus): failure_rate = (100.0 * session.testsfailed) / session.testscollected if failure_rate <= ACCEPTABLE_FAILURE_RATE: session.exitstatus = 99 + + +@pytest.fixture +def mock_missing_netrc(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch): + netrc_path = tmp_path / ".netrc" + monkeypatch.setenv("NETRC", str(netrc_path)) + monkeypatch.delenv("EARTHDATA_USERNAME") + monkeypatch.delenv("EARTHDATA_PASSWORD") + # Currently, due to there being only a single, global, module-level auth + # value, tests using different auth strategies interfere with each other, + # so here we are monkeypatching a new, unauthenticated Auth object. + auth = earthaccess.Auth() + monkeypatch.setattr(earthaccess, "_auth", auth) + monkeypatch.setattr(earthaccess, "__auth__", auth) + + +@pytest.fixture +def mock_netrc(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch): + netrc = tmp_path / ".netrc" + monkeypatch.setenv("NETRC", str(netrc)) + + username = os.environ["EARTHDATA_USERNAME"] + password = os.environ["EARTHDATA_PASSWORD"] + + netrc.write_text( + f"machine urs.earthdata.nasa.gov login {username} password {password}\n" + ) + netrc.chmod(0o600) diff --git a/tests/integration/test_api.py b/tests/integration/test_api.py index 8fd45489..f0fdd219 100644 --- a/tests/integration/test_api.py +++ b/tests/integration/test_api.py @@ -1,21 +1,11 @@ -# package imports import logging import os -import unittest from pathlib import Path import earthaccess import pytest logger = logging.getLogger(__name__) -assertions = unittest.TestCase("__init__") - - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") dataset_valid_params = [ @@ -44,29 +34,29 @@ def test_auth_returns_valid_auth_class(): auth = earthaccess.login(strategy="environment") - assertions.assertIsInstance(auth, earthaccess.Auth) - assertions.assertIsInstance(earthaccess.__auth__, earthaccess.Auth) - assertions.assertTrue(earthaccess.__auth__.authenticated) + assert isinstance(auth, earthaccess.Auth) + assert isinstance(earthaccess.__auth__, earthaccess.Auth) + assert earthaccess.__auth__.authenticated def test_dataset_search_returns_none_with_no_parameters(): results = earthaccess.search_datasets() - assertions.assertIsInstance(results, list) - assertions.assertTrue(len(results) == 0) + assert isinstance(results, list) + assert len(results) == 0 @pytest.mark.parametrize("kwargs", dataset_valid_params) def test_dataset_search_returns_valid_results(kwargs): results = earthaccess.search_datasets(**kwargs) - assertions.assertIsInstance(results, list) - assertions.assertIsInstance(results[0], dict) + assert isinstance(results, list) + assert isinstance(results[0], dict) @pytest.mark.parametrize("kwargs", granules_valid_params) def test_granules_search_returns_valid_results(kwargs): results = earthaccess.search_data(count=10, **kwargs) - assertions.assertIsInstance(results, list) - assertions.assertTrue(len(results) <= 10) + assert isinstance(results, list) + assert len(results) <= 10 @pytest.mark.parametrize("selection", [0, slice(None)]) @@ -80,15 +70,15 @@ def test_download(tmp_path, selection, use_url): ) if use_url: # Download via file URL string instead of DataGranule object - results = [r.data_links(access="indirect") for r in results] - results = sum(results, start=[]) # flatten to a list of strings + results = [link for r in results for link in r.data_links(access="indirect")] result = results[selection] files = earthaccess.download(result, str(tmp_path)) - assertions.assertIsInstance(files, list) + assert isinstance(files, list) assert all(Path(f).exists() for f in files) def test_auth_environ(): + earthaccess.login(strategy="environment") environ = earthaccess.auth_environ() assert environ == { "EARTHDATA_USERNAME": os.environ["EARTHDATA_USERNAME"], diff --git a/tests/integration/test_auth.py b/tests/integration/test_auth.py index 7c0c1b37..1a83833d 100644 --- a/tests/integration/test_auth.py +++ b/tests/integration/test_auth.py @@ -1,90 +1,54 @@ -# package imports import logging -import os -import pathlib -import unittest import earthaccess +import earthaccess.daac import pytest import requests import s3fs logger = logging.getLogger(__name__) -assertions = unittest.TestCase("__init__") - -NETRC_PATH = pathlib.Path.home() / pathlib.Path(".netrc") - - -def activate_environment(): - earthaccess.__auth__ = earthaccess.Auth() - # the original comes from github secrets - os.environ["EARTHDATA_USERNAME"] = os.getenv("EARTHACCESS_TEST_USERNAME", "") - os.environ["EARTHDATA_PASSWORD"] = os.getenv("EARTHACCESS_TEST_PASSWORD", "") - - -def activate_netrc(): - activate_environment() - username = os.environ["EARTHDATA_USERNAME"] - password = os.environ["EARTHDATA_PASSWORD"] - - with open(NETRC_PATH, "w") as f: - f.write( - f"machine urs.earthdata.nasa.gov login {username} password {password}\n" - ) - NETRC_PATH.chmod(0o600) - - -def delete_netrc(): - if NETRC_PATH.exists(): - NETRC_PATH.unlink() def test_auth_can_read_earthdata_env_variables(): - activate_environment() auth = earthaccess.login(strategy="environment") logger.info(f"Current username: {auth.username}") logger.info(f"earthaccess version: {earthaccess.__version__}") - assertions.assertIsInstance(auth, earthaccess.Auth) - assertions.assertIsInstance(earthaccess.__auth__, earthaccess.Auth) - assertions.assertTrue(earthaccess.__auth__.authenticated) + assert isinstance(auth, earthaccess.Auth) + assert isinstance(earthaccess.__auth__, earthaccess.Auth) + assert earthaccess.__auth__.authenticated -def test_auth_can_read_from_netrc_file(): - activate_netrc() +def test_auth_can_read_from_netrc_file(mock_netrc): auth = earthaccess.login(strategy="netrc") - assertions.assertTrue(auth.authenticated) - delete_netrc() + assert auth.authenticated -def test_auth_throws_exception_if_netrc_is_not_present(): - activate_environment() - delete_netrc() - with pytest.raises(Exception): +def test_auth_throws_exception_if_netrc_is_not_present(mock_missing_netrc): + with pytest.raises(FileNotFoundError): earthaccess.login(strategy="netrc") - assertions.assertRaises(FileNotFoundError) def test_auth_populates_attrs(): - activate_environment() auth = earthaccess.login(strategy="environment") - assertions.assertIsInstance(auth, earthaccess.Auth) - assertions.assertIsInstance(earthaccess.__auth__, earthaccess.Auth) - assertions.assertTrue(earthaccess.__auth__.authenticated) + assert isinstance(auth, earthaccess.Auth) + assert isinstance(earthaccess.__auth__, earthaccess.Auth) + assert earthaccess.__auth__.authenticated def test_auth_can_create_authenticated_requests_sessions(): - activate_environment() session = earthaccess.get_requests_https_session() - assertions.assertTrue("Authorization" in session.headers) - assertions.assertTrue("Bearer" in session.headers["Authorization"]) + assert "Authorization" in session.headers + assert "Bearer" in session.headers["Authorization"] # type: ignore -@pytest.mark.parametrize("daac", earthaccess.daac.DAACS) +@pytest.mark.parametrize( + "daac", [daac for daac in earthaccess.daac.DAACS if daac["s3-credentials"]] +) def test_auth_can_fetch_s3_credentials(daac): - activate_environment() auth = earthaccess.login(strategy="environment") assert auth.authenticated + try: credentials = earthaccess.get_s3_credentials(daac["short-name"]) except requests.RequestException as e: @@ -96,9 +60,9 @@ def test_auth_can_fetch_s3_credentials(daac): @pytest.mark.parametrize("location", ({"daac": "podaac"}, {"provider": "pocloud"})) def test_get_s3_credentials_lowercase_location(location): - activate_environment() earthaccess.login(strategy="environment") creds = earthaccess.get_s3_credentials(**location) + assert creds assert all( creds[key] @@ -108,8 +72,8 @@ def test_get_s3_credentials_lowercase_location(location): @pytest.mark.parametrize("location", ({"daac": "podaac"}, {"provider": "pocloud"})) def test_get_s3_filesystem_lowercase_location(location): - activate_environment() earthaccess.login(strategy="environment") fs = earthaccess.get_s3_filesystem(**location) + assert isinstance(fs, s3fs.S3FileSystem) assert all(fs.storage_options[key] for key in ["key", "secret", "token"]) diff --git a/tests/integration/test_cloud_download.py b/tests/integration/test_cloud_download.py index 4e8f9519..11fab5a1 100644 --- a/tests/integration/test_cloud_download.py +++ b/tests/integration/test_cloud_download.py @@ -1,9 +1,6 @@ -# package imports import logging -import os import random import shutil -import unittest from pathlib import Path import earthaccess @@ -56,20 +53,6 @@ }, ] -assertions = unittest.TestCase("__init__") - -# we need to use a valid EDL credential - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -auth = Auth().login(strategy="environment") -assertions.assertTrue(auth.authenticated) -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") - -store = Store(auth) - def get_sample_granules(granules, sample_size, max_granule_size): """Returns a list with sample granules and their size in MB if @@ -83,11 +66,9 @@ def get_sample_granules(granules, sample_size, max_granule_size): while tries <= max_tries: g = random.sample(granules, 1)[0] if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") tries += 1 continue else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") files_to_download.append(g) total_size += g.size() if len(files_to_download) >= sample_size: @@ -96,7 +77,7 @@ def get_sample_granules(granules, sample_size, max_granule_size): @pytest.mark.parametrize("daac", daac_list) -def test_earthaccess_can_download_cloud_collection_granules(daac): +def test_earthaccess_can_download_cloud_collection_granules(tmp_path, daac): """Tests that we can download cloud collections using HTTPS links.""" daac_shortname = daac["short_name"] collections_count = daac["collections_count"] @@ -109,17 +90,17 @@ def test_earthaccess_can_download_cloud_collection_granules(daac): hits = collection_query.hits() logger.info(f"Cloud hosted collections for {daac_shortname}: {hits}") collections = collection_query.get(collections_count) - assertions.assertGreater(len(collections), collections_sample_size) + assert len(collections) > collections_sample_size # We sample n cloud hosted collections from the results random_collections = random.sample(collections, collections_sample_size) + for collection in random_collections: concept_id = collection.concept_id() granule_query = DataGranules().concept_id(concept_id) total_granules = granule_query.hits() granules = granule_query.get(granules_count) assert isinstance(granules, list) and len(granules) > 0 - assert isinstance(granules[0], earthaccess.results.DataGranule) - local_path = f"./tests/integration/data/{concept_id}" + assert isinstance(granules[0], earthaccess.DataGranule) granules_to_download, total_size_cmr = get_sample_granules( granules, granules_sample_size, granules_max_size ) @@ -132,14 +113,16 @@ def test_earthaccess_can_download_cloud_collection_granules(daac): f"Testing {concept_id}, granules in collection: {total_granules}, " f"download size(MB): {total_size_cmr}" ) - # We are testing this method + path = tmp_path / "tests" / "integration" / "data" / concept_id + path.mkdir(parents=True) + store = Store(Auth().login(strategy="environment")) + try: - store.get(granules_to_download, local_path=local_path) - except Exception: - logger.warning(Exception) + # We are testing this method + store.get(granules_to_download, local_path=path) + except Exception as e: + logger.warning(e) - path = Path(local_path) - assert path.is_dir() # test that we downloaded the mb reported by CMR total_mb_downloaded = round( (sum(file.stat().st_size for file in path.rglob("*")) / 1024**2) @@ -163,4 +146,4 @@ def test_multi_file_granule(tmp_path): urls = granules[0].data_links() assert len(urls) > 1 files = earthaccess.download(granules, str(tmp_path)) - assert set([Path(f).name for f in urls]) == set([Path(f).name for f in files]) + assert {Path(f).name for f in urls} == {Path(f).name for f in files} diff --git a/tests/integration/test_cloud_open.py b/tests/integration/test_cloud_open.py index b69eba15..a0ca5501 100644 --- a/tests/integration/test_cloud_open.py +++ b/tests/integration/test_cloud_open.py @@ -1,8 +1,5 @@ -# package imports import logging -import os import random -import unittest import earthaccess import magic @@ -55,20 +52,6 @@ }, ] -assertions = unittest.TestCase("__init__") - -# we need to use a valid EDL credential - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -auth = Auth().login(strategy="environment") -assertions.assertTrue(auth.authenticated) -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") - -store = Store(auth) - def get_sample_granules(granules, sample_size, max_granule_size): """Returns a list with sample granules and their size in MB if @@ -82,11 +65,9 @@ def get_sample_granules(granules, sample_size, max_granule_size): while tries <= max_tries: g = random.sample(granules, 1)[0] if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") tries += 1 continue else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") files_to_download.append(g) total_size += g.size() if len(files_to_download) >= sample_size: @@ -95,10 +76,7 @@ def get_sample_granules(granules, sample_size, max_granule_size): def supported_collection(data_links): - for url in data_links: - if "podaac-tools.jpl.nasa.gov/drive" in url: - return False - return True + return all("podaac-tools.jpl.nasa.gov/drive" not in url for url in data_links) @pytest.mark.parametrize("daac", daacs_list) @@ -115,17 +93,18 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): hits = collection_query.hits() logger.info(f"Cloud hosted collections for {daac_shortname}: {hits}") collections = collection_query.get(collections_count) - assertions.assertGreater(len(collections), collections_sample_size) + assert len(collections) > collections_sample_size # We sample n cloud hosted collections from the results random_collections = random.sample(collections, collections_sample_size) logger.info(f"Sampled {len(random_collections)} collections") + for collection in random_collections: concept_id = collection.concept_id() granule_query = DataGranules().concept_id(concept_id) total_granules = granule_query.hits() granules = granule_query.get(granules_count) - assertions.assertTrue(len(granules) > 0, "Could not fetch granules") - assertions.assertTrue(isinstance(granules[0], earthaccess.results.DataGranule)) + assert len(granules) > 0, "Could not fetch granules" + assert isinstance(granules[0], earthaccess.DataGranule) data_links = granules[0].data_links() if not supported_collection(data_links): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") @@ -143,10 +122,11 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): f"download size(MB): {total_size_cmr}" ) + store = Store(Auth().login(strategy="environment")) # We are testing this method fileset = store.open(granules_to_open) - assertions.assertTrue(isinstance(fileset, list)) + assert isinstance(fileset, list) # we test that we can read some bytes and get the file type for file in fileset: @@ -163,4 +143,4 @@ def test_multi_file_granule(): urls = granules[0].data_links() assert len(urls) > 1 files = earthaccess.open(granules) - assert set(urls) == set(f.path for f in files) + assert set(urls) == {f.path for f in files} diff --git a/tests/integration/test_kerchunk.py b/tests/integration/test_kerchunk.py index 2e981cce..90a71d19 100644 --- a/tests/integration/test_kerchunk.py +++ b/tests/integration/test_kerchunk.py @@ -1,39 +1,25 @@ import logging -import os -import unittest from pathlib import Path import earthaccess import pytest from fsspec.core import strip_protocol -kerchunk = pytest.importorskip("kerchunk") -pytest.importorskip("dask") - logger = logging.getLogger(__name__) -assertions = unittest.TestCase("__init__") - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") @pytest.fixture(scope="module") def granules(): - granules = earthaccess.search_data( + return earthaccess.search_data( count=2, short_name="SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205", cloud_hosted=True, ) - return granules @pytest.mark.parametrize("protocol", ["", "file://"]) def test_consolidate_metadata_outfile(tmp_path, granules, protocol): outfile = f"{protocol}{tmp_path / 'metadata.json'}" - assert not Path(outfile).exists() result = earthaccess.consolidate_metadata( granules, outfile=outfile, @@ -44,7 +30,7 @@ def test_consolidate_metadata_outfile(tmp_path, granules, protocol): assert result == outfile -def test_consolidate_metadata_memory(tmp_path, granules): +def test_consolidate_metadata_memory(granules): result = earthaccess.consolidate_metadata( granules, access="indirect", @@ -56,15 +42,17 @@ def test_consolidate_metadata_memory(tmp_path, granules): @pytest.mark.parametrize("output", ["file", "memory"]) def test_consolidate_metadata(tmp_path, granules, output): - xr = pytest.importorskip("xarray") + # We import here because xarray is installed only when the kerchunk extra is + # installed, and when type checking is run, kerchunk (and thus xarray) is + # not installed, so mypy barfs when this is a top-level import. Further, + # mypy complains even when imported here, but here we can mark it to ignore. + import xarray as xr # type: ignore + # Open directly with `earthaccess.open` - expected = xr.open_mfdataset(earthaccess.open(granules)) + expected = xr.open_mfdataset(earthaccess.open(granules), engine="h5netcdf") # Open with kerchunk consolidated metadata file - if output == "file": - kwargs = {"outfile": tmp_path / "metadata.json"} - else: - kwargs = {} + kwargs = {"outfile": tmp_path / "metadata.json"} if output == "file" else {} metadata = earthaccess.consolidate_metadata( granules, access="indirect", kerchunk_options={"concat_dims": "Time"}, **kwargs ) diff --git a/tests/integration/test_onprem_download.py b/tests/integration/test_onprem_download.py index 242a3c26..d728be6c 100644 --- a/tests/integration/test_onprem_download.py +++ b/tests/integration/test_onprem_download.py @@ -1,10 +1,6 @@ -# package imports import logging -import os import random import shutil -import unittest -from pathlib import Path import earthaccess import pytest @@ -38,30 +34,8 @@ "granules_sample_size": 2, "granules_max_size_mb": 100, }, - { - "short_name": "ORNLDAAC", - "collections_count": 100, - "collections_sample_size": 3, - "granules_count": 100, - "granules_sample_size": 2, - "granules_max_size_mb": 50, - }, ] -assertions = unittest.TestCase("__init__") - -# we need to use a valid EDL credential - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -auth = Auth().login(strategy="environment") -assertions.assertTrue(auth.authenticated) -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") - -store = Store(auth) - def get_sample_granules(granules, sample_size, max_granule_size): """Returns a list with sample granules and their size in MB if @@ -75,11 +49,9 @@ def get_sample_granules(granules, sample_size, max_granule_size): while tries <= max_tries: g = random.sample(granules, 1)[0] if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") tries += 1 continue else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") files_to_download.append(g) total_size += g.size() if len(files_to_download) >= sample_size: @@ -88,14 +60,11 @@ def get_sample_granules(granules, sample_size, max_granule_size): def supported_collection(data_links): - for url in data_links: - if "podaac-tools.jpl.nasa.gov/drive" in url: - return False - return True + return all("podaac-tools.jpl.nasa.gov/drive" not in url for url in data_links) @pytest.mark.parametrize("daac", daacs_list) -def test_earthaccess_can_download_onprem_collection_granules(daac): +def test_earthaccess_can_download_onprem_collection_granules(tmp_path, daac): """Tests that we can download cloud collections using HTTPS links.""" daac_shortname = daac["short_name"] collections_count = daac["collections_count"] @@ -108,22 +77,22 @@ def test_earthaccess_can_download_onprem_collection_granules(daac): hits = collection_query.hits() logger.info(f"Cloud hosted collections for {daac_shortname}: {hits}") collections = collection_query.get(collections_count) - assertions.assertGreater(len(collections), collections_sample_size) + assert len(collections) > collections_sample_size # We sample n cloud hosted collections from the results random_collections = random.sample(collections, collections_sample_size) logger.info(f"Sampled {len(random_collections)} collections") + for collection in random_collections: concept_id = collection.concept_id() granule_query = DataGranules().concept_id(concept_id) total_granules = granule_query.hits() granules = granule_query.get(granules_count) - assertions.assertTrue(len(granules) > 0, "Could not fetch granules") - assertions.assertTrue(isinstance(granules[0], earthaccess.results.DataGranule)) + assert len(granules) > 0, "Could not fetch granules" + assert isinstance(granules[0], earthaccess.DataGranule) data_links = granules[0].data_links() if not supported_collection(data_links): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") continue - local_path = f"./tests/integration/data/{concept_id}" granules_to_download, total_size_cmr = get_sample_granules( granules, granules_sample_size, granules_max_size ) @@ -136,14 +105,15 @@ def test_earthaccess_can_download_onprem_collection_granules(daac): f"Testing {concept_id}, granules in collection: {total_granules}, " f"download size(MB): {total_size_cmr}" ) + path = tmp_path / "tests" / "integration" / "data" / concept_id + path.mkdir(parents=True) + store = Store(Auth().login(strategy="environment")) # We are testing this method - downloaded_results = store.get(granules_to_download, local_path=local_path) + downloaded_results = store.get(granules_to_download, local_path=path) - assertions.assertTrue(isinstance(downloaded_results, list)) - assertions.assertTrue(len(downloaded_results) == granules_sample_size) + assert isinstance(downloaded_results, list) + assert len(downloaded_results) >= granules_sample_size - path = Path(local_path) - assertions.assertTrue(path.is_dir()) # test that we downloaded the mb reported by CMR total_mb_downloaded = round( (sum(file.stat().st_size for file in path.rglob("*")) / 1024**2), 2 diff --git a/tests/integration/test_onprem_open.py b/tests/integration/test_onprem_open.py index 2a455c44..dbc41994 100644 --- a/tests/integration/test_onprem_open.py +++ b/tests/integration/test_onprem_open.py @@ -1,8 +1,5 @@ -# package imports import logging -import os import random -import unittest import earthaccess import magic @@ -37,30 +34,8 @@ "granules_sample_size": 2, "granules_max_size_mb": 130, }, - { - "short_name": "ORNLDAAC", - "collections_count": 100, - "collections_sample_size": 2, - "granules_count": 100, - "granules_sample_size": 2, - "granules_max_size_mb": 50, - }, ] -assertions = unittest.TestCase("__init__") - -# we need to use a valid EDL credential - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -auth = Auth().login(strategy="environment") -assertions.assertTrue(auth.authenticated) -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") - -store = Store(auth) - def get_sample_granules(granules, sample_size, max_granule_size): """Returns a list with sample granules and their size in MB if @@ -74,11 +49,9 @@ def get_sample_granules(granules, sample_size, max_granule_size): while tries <= max_tries: g = random.sample(granules, 1)[0] if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") tries += 1 continue else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") files_to_download.append(g) total_size += g.size() if len(files_to_download) >= sample_size: @@ -87,10 +60,7 @@ def get_sample_granules(granules, sample_size, max_granule_size): def supported_collection(data_links): - for url in data_links: - if "podaac-tools.jpl.nasa.gov/drive" in url: - return False - return True + return all("podaac-tools.jpl.nasa.gov/drive" not in url for url in data_links) @pytest.mark.parametrize("daac", daacs_list) @@ -107,17 +77,18 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): hits = collection_query.hits() logger.info(f"Cloud hosted collections for {daac_shortname}: {hits}") collections = collection_query.get(collections_count) - assertions.assertGreater(len(collections), collections_sample_size) + assert len(collections) > collections_sample_size # We sample n cloud hosted collections from the results random_collections = random.sample(collections, collections_sample_size) logger.info(f"Sampled {len(random_collections)} collections") + for collection in random_collections: concept_id = collection.concept_id() granule_query = DataGranules().concept_id(concept_id) total_granules = granule_query.hits() granules = granule_query.get(granules_count) - assertions.assertTrue(len(granules) > 0, "Could not fetch granules") - assertions.assertTrue(isinstance(granules[0], earthaccess.results.DataGranule)) + assert len(granules) > 0, "Could not fetch granules" + assert isinstance(granules[0], earthaccess.DataGranule) data_links = granules[0].data_links() if not supported_collection(data_links): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") @@ -135,10 +106,11 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): f"download size(MB): {total_size_cmr}" ) + store = Store(Auth().login(strategy="environment")) # We are testing this method fileset = store.open(granules_to_open) - assertions.assertTrue(isinstance(fileset, list)) + assert isinstance(fileset, list) # we test that we can read some bytes and get the file type for file in fileset: diff --git a/tests/integration/test_services.py b/tests/integration/test_services.py index 04de0dce..2d1eb34c 100644 --- a/tests/integration/test_services.py +++ b/tests/integration/test_services.py @@ -1,21 +1,6 @@ -# package imports -import logging -import os -import unittest - import earthaccess -from earthaccess.api import search_datasets from vcr.unittest import VCRTestCase # type: ignore[import-untyped] -assertions = unittest.TestCase("__init__") -logger = logging.getLogger(__name__) - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") - class TestServices(VCRTestCase): def scrub_access_token(self, string, replacement=""): @@ -41,7 +26,7 @@ def _get_vcr(self, **kwargs): def test_services(self): """Test that a list of services can be retrieved.""" - datasets = search_datasets( + datasets = earthaccess.search_datasets( short_name="MUR-JPL-L4-GLOB-v4.1", cloud_hosted=True, temporal=("2024-02-27T00:00:00Z", "2024-02-29T00:00:00Z"), diff --git a/uv.lock b/uv.lock index de79bc42..1bb4fb99 100644 --- a/uv.lock +++ b/uv.lock @@ -861,7 +861,6 @@ dependencies = [ { name = "fsspec" }, { name = "importlib-resources" }, { name = "multimethod" }, - { name = "numpy" }, { name = "pqdm" }, { name = "python-cmr" }, { name = "requests" }, @@ -901,10 +900,19 @@ docs = [ ] kerchunk = [ { name = "dask" }, + { name = "h5netcdf" }, + { name = "h5py" }, { name = "kerchunk" }, + { name = "numpy" }, + { name = "xarray" }, ] test = [ + { name = "dask" }, + { name = "h5netcdf" }, + { name = "h5py" }, + { name = "kerchunk" }, { name = "mypy" }, + { name = "numpy" }, { name = "pytest" }, { name = "pytest-cov" }, { name = "pytest-watch" }, @@ -914,6 +922,7 @@ test = [ { name = "types-requests", version = "2.32.0.20240907", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' and python_full_version < '3.12' and platform_python_implementation != 'PyPy'" }, { name = "types-setuptools" }, { name = "vcrpy" }, + { name = "xarray" }, ] [package.metadata] @@ -922,8 +931,11 @@ requires-dist = [ { name = "cftime", marker = "extra == 'docs'", specifier = ">=1.6.4" }, { name = "dask", marker = "extra == 'docs'", specifier = ">=2024.8.0" }, { name = "dask", marker = "extra == 'kerchunk'" }, + { name = "earthaccess", extras = ["kerchunk"], marker = "extra == 'test'" }, { name = "fsspec", specifier = ">=2022.11" }, { name = "h5netcdf", marker = "extra == 'docs'", specifier = ">=0.11" }, + { name = "h5netcdf", marker = "extra == 'kerchunk'" }, + { name = "h5py", marker = "extra == 'kerchunk'", specifier = ">=3.0" }, { name = "importlib-resources", specifier = ">=6.3.2" }, { name = "ipywidgets", marker = "extra == 'docs'", specifier = ">=7.7.0" }, { name = "jupyterlab", marker = "extra == 'docs'", specifier = ">=3" }, @@ -940,8 +952,8 @@ requires-dist = [ { name = "multimethod", specifier = ">=1.8" }, { name = "mypy", marker = "extra == 'test'", specifier = ">=1.11.2" }, { name = "nox", marker = "extra == 'dev'" }, - { name = "numpy", marker = "python_full_version < '3.12'", specifier = ">=1.24.0" }, - { name = "numpy", marker = "python_full_version >= '3.12'", specifier = ">=1.26.0" }, + { name = "numpy", marker = "python_full_version >= '3.12' and extra == 'kerchunk'", specifier = ">=1.26.0" }, + { name = "numpy", marker = "python_full_version < '3.12' and extra == 'kerchunk'", specifier = ">=1.24.0" }, { name = "pqdm", specifier = ">=0.1" }, { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=2.4" }, { name = "pygments", marker = "extra == 'docs'", specifier = ">=2.11.1" }, @@ -965,6 +977,7 @@ requires-dist = [ { name = "vcrpy", marker = "extra == 'test'", specifier = ">=6.0.1" }, { name = "widgetsnbextension", marker = "extra == 'docs'", specifier = ">=3.6.0" }, { name = "xarray", marker = "extra == 'docs'", specifier = ">=2023.1" }, + { name = "xarray", marker = "extra == 'kerchunk'" }, ] [[package]]