From 349a140e057021aff83a2c5dd704b9fa578a9a4d Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Wed, 29 Nov 2023 21:43:41 -0900 Subject: [PATCH 01/11] switch black,autoflake,flake8,isort to ruff --- .flake8 | 3 - .pre-commit-config.yaml | 8 +- poetry.lock | 157 +++++++--------------------------------- pyproject.toml | 31 +++++++- scripts/format.sh | 5 +- scripts/lint.sh | 3 +- 6 files changed, 63 insertions(+), 144 deletions(-) delete mode 100644 .flake8 diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 38ffcd9b..00000000 --- a/.flake8 +++ /dev/null @@ -1,3 +0,0 @@ -[flake8] -max-line-length = 120 -exclude = mypy-stubs,stubs,typeshed diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fffe4a1d..4287d6bc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,7 +11,9 @@ repos: - id: trailing-whitespace - id: check-toml - id: check-json -- repo: https://github.com/psf/black - rev: 23.10.1 +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.6 hooks: - - id: black + - id: ruff + args: [ --fix ] + - id: ruff-format diff --git a/poetry.lock b/poetry.lock index a843ad59..dd2916a4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiobotocore" @@ -319,21 +319,6 @@ docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib- tests = ["attrs[tests-no-zope]", "zope-interface"] tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] -[[package]] -name = "autoflake" -version = "1.7.8" -description = "Removes unused imports and unused variables" -optional = false -python-versions = ">=3.7" -files = [ - {file = "autoflake-1.7.8-py3-none-any.whl", hash = "sha256:46373ef69b6714f5064c923bb28bd797c4f8a9497f557d87fc36665c6d956b39"}, - {file = "autoflake-1.7.8.tar.gz", hash = "sha256:e7e46372dee46fa1c97acf310d99d922b63d369718a270809d7c278d34a194cf"}, -] - -[package.dependencies] -pyflakes = ">=1.1.0,<3" -tomli = {version = ">=2.0.1", markers = "python_version < \"3.11\""} - [[package]] name = "babel" version = "2.12.1" @@ -377,52 +362,6 @@ soupsieve = ">1.2" html5lib = ["html5lib"] lxml = ["lxml"] -[[package]] -name = "black" -version = "23.7.0" -description = "The uncompromising code formatter." -optional = false -python-versions = ">=3.8" -files = [ - {file = "black-23.7.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:5c4bc552ab52f6c1c506ccae05681fab58c3f72d59ae6e6639e8885e94fe2587"}, - {file = "black-23.7.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:552513d5cd5694590d7ef6f46e1767a4df9af168d449ff767b13b084c020e63f"}, - {file = "black-23.7.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:86cee259349b4448adb4ef9b204bb4467aae74a386bce85d56ba4f5dc0da27be"}, - {file = "black-23.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:501387a9edcb75d7ae8a4412bb8749900386eaef258f1aefab18adddea1936bc"}, - {file = "black-23.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb074d8b213749fa1d077d630db0d5f8cc3b2ae63587ad4116e8a436e9bbe995"}, - {file = "black-23.7.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:b5b0ee6d96b345a8b420100b7d71ebfdd19fab5e8301aff48ec270042cd40ac2"}, - {file = "black-23.7.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:893695a76b140881531062d48476ebe4a48f5d1e9388177e175d76234ca247cd"}, - {file = "black-23.7.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:c333286dc3ddca6fdff74670b911cccedacb4ef0a60b34e491b8a67c833b343a"}, - {file = "black-23.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:831d8f54c3a8c8cf55f64d0422ee875eecac26f5f649fb6c1df65316b67c8926"}, - {file = "black-23.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:7f3bf2dec7d541b4619b8ce526bda74a6b0bffc480a163fed32eb8b3c9aed8ad"}, - {file = "black-23.7.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:f9062af71c59c004cd519e2fb8f5d25d39e46d3af011b41ab43b9c74e27e236f"}, - {file = "black-23.7.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:01ede61aac8c154b55f35301fac3e730baf0c9cf8120f65a9cd61a81cfb4a0c3"}, - {file = "black-23.7.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:327a8c2550ddc573b51e2c352adb88143464bb9d92c10416feb86b0f5aee5ff6"}, - {file = "black-23.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1c6022b86f83b632d06f2b02774134def5d4d4f1dac8bef16d90cda18ba28a"}, - {file = "black-23.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:27eb7a0c71604d5de083757fbdb245b1a4fae60e9596514c6ec497eb63f95320"}, - {file = "black-23.7.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:8417dbd2f57b5701492cd46edcecc4f9208dc75529bcf76c514864e48da867d9"}, - {file = "black-23.7.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:47e56d83aad53ca140da0af87678fb38e44fd6bc0af71eebab2d1f59b1acf1d3"}, - {file = "black-23.7.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:25cc308838fe71f7065df53aedd20327969d05671bac95b38fdf37ebe70ac087"}, - {file = "black-23.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:642496b675095d423f9b8448243336f8ec71c9d4d57ec17bf795b67f08132a91"}, - {file = "black-23.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:ad0014efc7acf0bd745792bd0d8857413652979200ab924fbf239062adc12491"}, - {file = "black-23.7.0-py3-none-any.whl", hash = "sha256:9fd59d418c60c0348505f2ddf9609c1e1de8e7493eab96198fc89d9f865e7a96"}, - {file = "black-23.7.0.tar.gz", hash = "sha256:022a582720b0d9480ed82576c920a8c1dde97cc38ff11d8d8859b3bd6ca9eedb"}, -] - -[package.dependencies] -click = ">=8.0.0" -mypy-extensions = ">=0.4.3" -packaging = ">=22.0" -pathspec = ">=0.9.0" -platformdirs = ">=2" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} - -[package.extras] -colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.7.4)"] -jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] -uvloop = ["uvloop (>=0.15.2)"] - [[package]] name = "bleach" version = "6.0.0" @@ -1046,22 +985,6 @@ files = [ docs = ["furo (>=2023.5.20)", "sphinx (>=7.0.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "diff-cover (>=7.5)", "pytest (>=7.3.1)", "pytest-cov (>=4.1)", "pytest-mock (>=3.10)", "pytest-timeout (>=2.1)"] -[[package]] -name = "flake8" -version = "5.0.4" -description = "the modular source code checker: pep8 pyflakes and co" -optional = false -python-versions = ">=3.6.1" -files = [ - {file = "flake8-5.0.4-py2.py3-none-any.whl", hash = "sha256:7a1cf6b73744f5806ab95e526f6f0d8c01c66d7bbe349562d22dfca20610b248"}, - {file = "flake8-5.0.4.tar.gz", hash = "sha256:6fbe320aad8d6b95cec8b8e47bc933004678dc63095be98528b7bdd2a9f510db"}, -] - -[package.dependencies] -mccabe = ">=0.7.0,<0.8.0" -pycodestyle = ">=2.9.0,<2.10.0" -pyflakes = ">=2.5.0,<2.6.0" - [[package]] name = "fonttools" version = "4.41.1" @@ -1494,23 +1417,6 @@ files = [ [package.dependencies] arrow = ">=0.15.0" -[[package]] -name = "isort" -version = "5.12.0" -description = "A Python utility / library to sort Python imports." -optional = false -python-versions = ">=3.8.0" -files = [ - {file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"}, - {file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"}, -] - -[package.extras] -colors = ["colorama (>=0.4.3)"] -pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"] -plugins = ["setuptools"] -requirements-deprecated-finder = ["pip-api", "pipreqs"] - [[package]] name = "jedi" version = "0.19.0" @@ -2283,17 +2189,6 @@ files = [ [package.dependencies] traitlets = "*" -[[package]] -name = "mccabe" -version = "0.7.0" -description = "McCabe checker, plugin for flake8" -optional = false -python-versions = ">=3.6" -files = [ - {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, - {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, -] - [[package]] name = "mdit-py-plugins" version = "0.4.0" @@ -3184,17 +3079,6 @@ files = [ [package.extras] tests = ["pytest"] -[[package]] -name = "pycodestyle" -version = "2.9.1" -description = "Python style guide checker" -optional = false -python-versions = ">=3.6" -files = [ - {file = "pycodestyle-2.9.1-py2.py3-none-any.whl", hash = "sha256:d1735fc58b418fd7c5f658d28d943854f8a849b01a5d0a1e6f3f3fdd0166804b"}, - {file = "pycodestyle-2.9.1.tar.gz", hash = "sha256:2c9607871d58c76354b697b42f5d57e1ada7d261c261efac224b664affdc5785"}, -] - [[package]] name = "pycparser" version = "2.21" @@ -3258,17 +3142,6 @@ typing-extensions = ">=4.2.0" dotenv = ["python-dotenv (>=0.10.4)"] email = ["email-validator (>=1.0.3)"] -[[package]] -name = "pyflakes" -version = "2.5.0" -description = "passive checker of Python programs" -optional = false -python-versions = ">=3.6" -files = [ - {file = "pyflakes-2.5.0-py2.py3-none-any.whl", hash = "sha256:4579f67d887f804e67edb544428f264b7b24f435b263c4614f384135cea553d2"}, - {file = "pyflakes-2.5.0.tar.gz", hash = "sha256:491feb020dca48ccc562a8c0cbe8df07ee13078df59813b83959cbdada312ea3"}, -] - [[package]] name = "pygments" version = "2.15.1" @@ -3899,6 +3772,32 @@ files = [ {file = "rpds_py-0.9.2.tar.gz", hash = "sha256:8d70e8f14900f2657c249ea4def963bed86a29b81f81f5b76b5a9215680de945"}, ] +[[package]] +name = "ruff" +version = "0.1.6" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.1.6-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:88b8cdf6abf98130991cbc9f6438f35f6e8d41a02622cc5ee130a02a0ed28703"}, + {file = "ruff-0.1.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:5c549ed437680b6105a1299d2cd30e4964211606eeb48a0ff7a93ef70b902248"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cf5f701062e294f2167e66d11b092bba7af6a057668ed618a9253e1e90cfd76"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:05991ee20d4ac4bb78385360c684e4b417edd971030ab12a4fbd075ff535050e"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87455a0c1f739b3c069e2f4c43b66479a54dea0276dd5d4d67b091265f6fd1dc"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:683aa5bdda5a48cb8266fcde8eea2a6af4e5700a392c56ea5fb5f0d4bfdc0240"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:137852105586dcbf80c1717facb6781555c4e99f520c9c827bd414fac67ddfb6"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd98138a98d48a1c36c394fd6b84cd943ac92a08278aa8ac8c0fdefcf7138f35"}, + {file = "ruff-0.1.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a0cd909d25f227ac5c36d4e7e681577275fb74ba3b11d288aff7ec47e3ae745"}, + {file = "ruff-0.1.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8fd1c62a47aa88a02707b5dd20c5ff20d035d634aa74826b42a1da77861b5ff"}, + {file = "ruff-0.1.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:fd89b45d374935829134a082617954120d7a1470a9f0ec0e7f3ead983edc48cc"}, + {file = "ruff-0.1.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:491262006e92f825b145cd1e52948073c56560243b55fb3b4ecb142f6f0e9543"}, + {file = "ruff-0.1.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:ea284789861b8b5ca9d5443591a92a397ac183d4351882ab52f6296b4fdd5462"}, + {file = "ruff-0.1.6-py3-none-win32.whl", hash = "sha256:1610e14750826dfc207ccbcdd7331b6bd285607d4181df9c1c6ae26646d6848a"}, + {file = "ruff-0.1.6-py3-none-win_amd64.whl", hash = "sha256:4558b3e178145491e9bc3b2ee3c4b42f19d19384eaa5c59d10acf6e8f8b57e33"}, + {file = "ruff-0.1.6-py3-none-win_arm64.whl", hash = "sha256:03910e81df0d8db0e30050725a5802441c2022ea3ae4fe0609b76081731accbc"}, + {file = "ruff-0.1.6.tar.gz", hash = "sha256:1b09f29b16c6ead5ea6b097ef2764b42372aebe363722f1605ecbcd2b9207184"}, +] + [[package]] name = "s3fs" version = "2023.6.0" @@ -4591,4 +4490,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.8,<4.0" -content-hash = "0a1997946e1e0a3050c5d40459b0220493a8ede99ad08e7b69476ed9f004821a" +content-hash = "e426c0c43eaa11a5591d1eab3ee1c21b0cb6e833b12ec8352a46c8a3fdad5b9e" diff --git a/pyproject.toml b/pyproject.toml index 88b85ceb..09788617 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,10 +43,6 @@ multimethod = ">=1.8" [tool.poetry.dev-dependencies] python-magic = ">=0.4" mypy = ">=0.812" -black = ">=21.11b0" -isort = ">=5" -autoflake = ">=1.3" -flake8 = ">=3.7" jupyterlab = ">=3" pre-commit = ">=2.4" pytest = ">=6.0" @@ -73,6 +69,9 @@ dask = ">=2022.1" pyproj = ">=3.5.0" bump-my-version = ">=0.10.0" +[tool.poetry.group.dev.dependencies] +ruff = "^0.1.6" + [build-system] requires = ["poetry>=0.12"] build-backend = "poetry.masonry.api" @@ -93,6 +92,30 @@ module = [ ignore_errors = true +[tool.ruff] +line-length = 120 +src = ["earthaccess", "tests"] +exclude = ["mypy-stubs", "stubs", "typeshed"] + +[tool.ruff.format] +indent-style = "space" +quote-style = "single" + +[tool.ruff.lint] +extend-select = ["I"] + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.isort] +combine-as-imports = true +# unsupported isort arguments: +# --multi-line=3 +# --trailing-comma +# --force-grid-wrap=0 +# --line-width 88 + + [tool.bumpversion] current_version = "0.8.0" commit = false diff --git a/scripts/format.sh b/scripts/format.sh index 6a23d027..aaeb10e5 100755 --- a/scripts/format.sh +++ b/scripts/format.sh @@ -1,6 +1,5 @@ #!/bin/sh -e set -x -autoflake --remove-all-unused-imports --recursive --remove-unused-variables --in-place earthaccess tests --exclude=__init__.py -black earthaccess tests -isort --multi-line=3 --trailing-comma --force-grid-wrap=0 --combine-as --line-width 88 --recursive --thirdparty earthaccess --apply earthaccess tests +ruff check --fix earthaccess tests +ruff format earthaccess tests diff --git a/scripts/lint.sh b/scripts/lint.sh index 53e22c21..b8f7088f 100755 --- a/scripts/lint.sh +++ b/scripts/lint.sh @@ -4,5 +4,4 @@ set -e set -x mypy earthaccess --disallow-untyped-defs -black earthaccess tests --check -isort --multi-line=3 --trailing-comma --force-grid-wrap=0 --combine-as --line-width 88 --check-only --thirdparty earthaccess tests +ruff check earthaccess tests From dbe1b2b33030c158624849cead6aaee0be731092 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Wed, 29 Nov 2023 21:49:27 -0900 Subject: [PATCH 02/11] ruff format and check --fix --- earthaccess/__init__.py | 9 +-- earthaccess/api.py | 19 ++---- earthaccess/auth.py | 64 +++++------------- earthaccess/daac.py | 23 ++----- earthaccess/formatters.py | 5 +- earthaccess/results.py | 34 ++-------- earthaccess/search.py | 34 +++------- earthaccess/store.py | 81 ++++++----------------- pyproject.toml | 12 ---- tests/integration/test_auth.py | 13 +--- tests/integration/test_cloud_download.py | 15 ++--- tests/integration/test_cloud_open.py | 11 +-- tests/integration/test_onprem_download.py | 15 ++--- tests/integration/test_onprem_open.py | 11 +-- tests/unit/test_auth.py | 4 +- 15 files changed, 84 insertions(+), 266 deletions(-) diff --git a/earthaccess/__init__.py b/earthaccess/__init__.py index 4e71e69d..18750843 100644 --- a/earthaccess/__init__.py +++ b/earthaccess/__init__.py @@ -1,7 +1,6 @@ import logging import threading from importlib.metadata import version -from typing import Any from .api import ( auth_environ, @@ -65,18 +64,14 @@ def __getattr__(name): # type: ignore try: _auth.login(strategy=strategy) except Exception as e: - logger.debug( - f"An error occurred during automatic authentication with {strategy=}: {str(e)}" - ) + logger.debug(f"An error occurred during automatic authentication with {strategy=}: {str(e)}") continue else: if not _auth.authenticated: continue else: _store = Store(_auth) - logger.debug( - f"Automatic authentication with {strategy=} was successful" - ) + logger.debug(f"Automatic authentication with {strategy=} was successful") break return _auth if name == "__auth__" else _store else: diff --git a/earthaccess/api.py b/earthaccess/api.py index b2714312..e6a1d86c 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -1,10 +1,11 @@ from typing import Any, Dict, List, Optional, Type, Union -import earthaccess import requests import s3fs from fsspec import AbstractFileSystem +import earthaccess + from .auth import Auth from .results import DataGranule from .search import CollectionQuery, DataCollections, DataGranules, GranuleQuery @@ -27,9 +28,7 @@ def _normalize_location(location: Union[str, None]) -> Union[str, None]: return location -def search_datasets( - count: int = -1, **kwargs: Any -) -> List[earthaccess.results.DataCollection]: +def search_datasets(count: int = -1, **kwargs: Any) -> List[earthaccess.results.DataCollection]: """Search datasets using NASA's CMR [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) @@ -65,9 +64,7 @@ def search_datasets( ``` """ if not validate.valid_dataset_parameters(**kwargs): - print( - "Warning: a valid set of parameters is needed to search for datasets on CMR" - ) + print("Warning: a valid set of parameters is needed to search for datasets on CMR") return [] if earthaccess.__auth__.authenticated: query = DataCollections(auth=earthaccess.__auth__).parameters(**kwargs) @@ -80,9 +77,7 @@ def search_datasets( return query.get_all() -def search_data( - count: int = -1, **kwargs: Any -) -> List[earthaccess.results.DataGranule]: +def search_data(count: int = -1, **kwargs: Any) -> List[earthaccess.results.DataGranule]: """Search dataset granules using NASA's CMR. [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) @@ -357,7 +352,5 @@ def get_edl_token() -> str: def auth_environ() -> Dict[str, str]: auth = earthaccess.__auth__ if not auth.authenticated: - raise RuntimeError( - "`auth_environ()` requires you to first authenticate with `earthaccess.login()`" - ) + raise RuntimeError("`auth_environ()` requires you to first authenticate with `earthaccess.login()`") return {"EARTHDATA_USERNAME": auth.username, "EARTHDATA_PASSWORD": auth.password} diff --git a/earthaccess/auth.py b/earthaccess/auth.py index 9a3b22cb..d25129d8 100644 --- a/earthaccess/auth.py +++ b/earthaccess/auth.py @@ -23,9 +23,7 @@ class SessionWithHeaderRedirection(requests.Session): AUTH_HOST = "urs.earthdata.nasa.gov" - def __init__( - self, username: Optional[str] = None, password: Optional[str] = None - ) -> None: + def __init__(self, username: Optional[str] = None, password: Optional[str] = None) -> None: super().__init__() if username and password: self.auth = (username, password) @@ -95,9 +93,7 @@ def refresh_tokens(self) -> bool: This method renews the tokens to make sure we can query the collections allowed to our EDL user. """ if len(self.tokens) == 0: - resp_tokens = self._generate_user_token( - username=self.username, password=self.password - ) + resp_tokens = self._generate_user_token(username=self.username, password=self.password) if resp_tokens.ok: self.token = resp_tokens.json() self.tokens = [self.token] @@ -108,9 +104,7 @@ def refresh_tokens(self) -> bool: else: return False if len(self.tokens) == 1: - resp_tokens = self._generate_user_token( - username=self.username, password=self.password - ) + resp_tokens = self._generate_user_token(username=self.username, password=self.password) if resp_tokens.ok: self.token = resp_tokens.json() self.tokens.extend(self.token) @@ -124,9 +118,7 @@ def refresh_tokens(self) -> bool: if len(self.tokens) == 2: resp_revoked = self._revoke_user_token(self.token["access_token"]) if resp_revoked: - resp_tokens = self._generate_user_token( - username=self.username, password=self.password - ) + resp_tokens = self._generate_user_token(username=self.username, password=self.password) if resp_tokens.ok: self.token = resp_tokens.json() self.tokens[0] = self.token @@ -161,34 +153,22 @@ def get_s3_credentials( if self.authenticated: session = SessionWithHeaderRedirection(self.username, self.password) if endpoint is None: - auth_url = self._get_cloud_auth_url( - daac_shortname=daac, provider=provider - ) + auth_url = self._get_cloud_auth_url(daac_shortname=daac, provider=provider) else: auth_url = endpoint if auth_url.startswith("https://"): cumulus_resp = session.get(auth_url, timeout=15, allow_redirects=True) - auth_resp = session.get( - cumulus_resp.url, allow_redirects=True, timeout=15 - ) + auth_resp = session.get(cumulus_resp.url, allow_redirects=True, timeout=15) if not (auth_resp.ok): # type: ignore # Let's try to authenticate with Bearer tokens _session = self.get_session() - cumulus_resp = _session.get( - auth_url, timeout=15, allow_redirects=True - ) - auth_resp = _session.get( - cumulus_resp.url, allow_redirects=True, timeout=15 - ) + cumulus_resp = _session.get(auth_url, timeout=15, allow_redirects=True) + auth_resp = _session.get(cumulus_resp.url, allow_redirects=True, timeout=15) if not (auth_resp.ok): - print( - f"Authentication with Earthdata Login failed with:\n{auth_resp.text[0:1000]}" - ) + print(f"Authentication with Earthdata Login failed with:\n{auth_resp.text[0:1000]}") eula_url = "https://urs.earthdata.nasa.gov/users/earthaccess/unaccepted_eulas" apps_url = "https://urs.earthdata.nasa.gov/application_search" - print( - f"Consider accepting the EULAs available at {eula_url} and applications at {apps_url}" - ) + print(f"Consider accepting the EULAs available at {eula_url} and applications at {apps_url}") return {} return auth_resp.json() @@ -214,9 +194,7 @@ class Session instance with Auth and bearer token headers if bearer_token and self.authenticated: # This will avoid the use of the netrc after we are logged in session.trust_env = False - session.headers.update( - {"Authorization": f'Bearer {self.token["access_token"]}'} - ) + session.headers.update({"Authorization": f'Bearer {self.token["access_token"]}'}) return session def get_user_profile(self) -> Dict[str, Any]: @@ -243,9 +221,7 @@ def _netrc(self) -> bool: try: my_netrc = Netrc() except FileNotFoundError as err: - raise FileNotFoundError( - f"No .netrc found in {os.path.expanduser('~')}" - ) from err + raise FileNotFoundError(f"No .netrc found in {os.path.expanduser('~')}") from err except NetrcParseError as err: raise NetrcParseError("Unable to parse .netrc") from err if my_netrc["urs.earthdata.nasa.gov"] is not None: @@ -271,16 +247,12 @@ def _environment(self) -> bool: ) return authenticated - def _get_credentials( - self, username: Optional[str], password: Optional[str] - ) -> bool: + def _get_credentials(self, username: Optional[str], password: Optional[str]) -> bool: if username is not None and password is not None: token_resp = self._get_user_tokens(username, password) if not (token_resp.ok): # type: ignore - print( - f"Authentication with Earthdata Login failed with:\n{token_resp.text}" - ) + print(f"Authentication with Earthdata Login failed with:\n{token_resp.text}") return False logger.debug("You're now authenticated with NASA Earthdata Login") self.username = username @@ -297,9 +269,7 @@ def _get_credentials( self.token = self.tokens[0] elif len(self.tokens) > 0: self.token = self.tokens[0] - logger.debug( - f"Using token with expiration date: {self.token['expiration_date']}" - ) + logger.debug(f"Using token with expiration date: {self.token['expiration_date']}") profile = self.get_user_profile() if "email_address" in profile: self.user_profile = profile @@ -360,9 +330,7 @@ def _persist_user_credentials(self, username: str, password: str) -> bool: my_netrc.save() return True - def _get_cloud_auth_url( - self, daac_shortname: Optional[str] = "", provider: Optional[str] = "" - ) -> str: + def _get_cloud_auth_url(self, daac_shortname: Optional[str] = "", provider: Optional[str] = "") -> str: for daac in DAACS: if ( daac_shortname == daac["short-name"] diff --git a/earthaccess/daac.py b/earthaccess/daac.py index 5581f2c5..b17c02a7 100644 --- a/earthaccess/daac.py +++ b/earthaccess/daac.py @@ -110,18 +110,9 @@ # Some testing urls behind EDL DAAC_TEST_URLS = [ - ( - "https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/" - "JASON_CS_S6A_L2_ALT_LR_STD_OST_NRT_F/" - ), - ( - "https://data.nsidc.earthdatacloud.nasa.gov/nsidc-cumulus-prod-protected/ATLAS/" - "ATL03/005/2018/10/14/dummy.nc" - ), - ( - "https://n5eil01u.ecs.nsidc.org/DP7/ATLAS/ATL06.005/2018.10.14/" - "ATL06_20181014045341_02380102_005_01.iso.xml" - ), + ("https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/" "JASON_CS_S6A_L2_ALT_LR_STD_OST_NRT_F/"), + ("https://data.nsidc.earthdatacloud.nasa.gov/nsidc-cumulus-prod-protected/ATLAS/" "ATL03/005/2018/10/14/dummy.nc"), + ("https://n5eil01u.ecs.nsidc.org/DP7/ATLAS/ATL06.005/2018.10.14/" "ATL06_20181014045341_02380102_005_01.iso.xml"), ("https://hydro1.gesdisc.eosdis.nasa.gov/data/GLDAS/GLDAS_NOAH10_M.2.0/1948/"), ( "https://e4ftl01.cr.usgs.gov//DP114/MOTA/MCD43A3.006/2000.02.24/" @@ -131,9 +122,7 @@ ] -def find_provider( - daac_short_name: Optional[str] = None, cloud_hosted: Optional[bool] = None -) -> Union[str, None]: +def find_provider(daac_short_name: Optional[str] = None, cloud_hosted: Optional[bool] = None) -> Union[str, None]: for daac in DAACS: if daac_short_name == daac["short-name"]: if cloud_hosted: @@ -150,9 +139,7 @@ def find_provider( def find_provider_by_shortname(short_name: str, cloud_hosted: bool) -> Union[str, None]: base_url = "https://cmr.earthdata.nasa.gov/search/collections.umm_json?" - providers = requests.get( - f"{base_url}&cloud_hosted={cloud_hosted}&short_name={short_name}" - ).json() + providers = requests.get(f"{base_url}&cloud_hosted={cloud_hosted}&short_name={short_name}").json() if int(providers["hits"]) > 0: return providers["items"][0]["meta"]["provider-id"] else: diff --git a/earthaccess/formatters.py b/earthaccess/formatters.py index d37d4e22..c35e9e3d 100644 --- a/earthaccess/formatters.py +++ b/earthaccess/formatters.py @@ -8,10 +8,7 @@ def _load_static_files() -> List[str]: """Load styles""" - return [ - pkg_resources.resource_string("earthaccess", fname).decode("utf8") - for fname in STATIC_FILES - ] + return [pkg_resources.resource_string("earthaccess", fname).decode("utf8") for fname in STATIC_FILES] def _repr_collection_html() -> str: diff --git a/earthaccess/results.py b/earthaccess/results.py index f0bb2167..37e8582b 100644 --- a/earthaccess/results.py +++ b/earthaccess/results.py @@ -28,17 +28,9 @@ def __init__( self.render_dict = self._filter_fields_(fields) def _filter_fields_(self, fields: List[str]) -> Dict[str, Any]: - filtered_dict = { - "umm": dict( - (field, self["umm"][field]) for field in fields if field in self["umm"] - ) - } + filtered_dict = {"umm": dict((field, self["umm"][field]) for field in fields if field in self["umm"])} basic_dict = { - "meta": dict( - (field, self["meta"][field]) - for field in self._basic_meta_fields_ - if field in self["meta"] - ) + "meta": dict((field, self["meta"][field]) for field in self._basic_meta_fields_ if field in self["meta"]) } basic_dict.update(filtered_dict) return basic_dict @@ -121,11 +113,7 @@ def data_type(self) -> str: If available, it returns the collection data type, i.e. HDF5, CSV etc """ if "ArchiveAndDistributionInformation" in self["umm"]: - return str( - self["umm"]["ArchiveAndDistributionInformation"][ - "FileDistributionInformation" - ] - ) + return str(self["umm"]["ArchiveAndDistributionInformation"]["FileDistributionInformation"]) return "" def version(self) -> str: @@ -174,9 +162,7 @@ def s3_bucket(self) -> Dict[str, Any]: return {} def __repr__(self) -> str: - return json.dumps( - self.render_dict, sort_keys=False, indent=2, separators=(",", ": ") - ) + return json.dumps(self.render_dict, sort_keys=False, indent=2, separators=(",", ": ")) class DataGranule(CustomDict): @@ -228,9 +214,7 @@ def __repr__(self) -> str: Temporal coverage: {self['umm']['TemporalExtent']} Size(MB): {self.size()} Data: {data_links}\n\n - """.strip().replace( - " ", "" - ) + """.strip().replace(" ", "") return rep_str def _repr_html_(self) -> str: @@ -280,15 +264,11 @@ def _derive_s3_link(self, links: List[str]) -> List[str]: for link in links: if link.startswith("s3"): s3_links.append(link) - elif link.startswith("https://") and ( - "cumulus" in link or "protected" in link - ): + elif link.startswith("https://") and ("cumulus" in link or "protected" in link): s3_links.append(f's3://{links[0].split("nasa.gov/")[1]}') return s3_links - def data_links( - self, access: Optional[str] = None, in_region: bool = False - ) -> List[str]: + def data_links(self, access: Optional[str] = None, in_region: bool = False) -> List[str]: """Returns the data links form a granule Parameters: diff --git a/earthaccess/search.py b/earthaccess/search.py index 0ed3b61d..70fab447 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -243,10 +243,7 @@ def get(self, limit: int = 2000) -> list: if self._format == "json": latest = response.json()["feed"]["entry"] elif self._format == "umm_json": - latest = list( - DataCollection(collection, self._fields) - for collection in response.json()["items"] - ) + latest = list(DataCollection(collection, self._fields) for collection in response.json()["items"]) else: latest = [response.text] @@ -258,9 +255,7 @@ def get(self, limit: int = 2000) -> list: return results - def temporal( - self, date_from: str, date_to: str, exclude_boundary: bool = False - ) -> Type[CollectionQuery]: + def temporal(self, date_from: str, date_to: str, exclude_boundary: bool = False) -> Type[CollectionQuery]: """Filter by an open or closed date range. Dates can be provided as datetime objects or ISO 8601 formatted strings. Multiple ranges can be provided by successive calls. to this method before calling execute(). @@ -424,9 +419,7 @@ def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: raise TypeError("cloud_hosted must be of type bool") if "short_name" in self.params: - provider = find_provider_by_shortname( - self.params["short_name"], cloud_hosted - ) + provider = find_provider_by_shortname(self.params["short_name"], cloud_hosted) if provider is not None: self.params["provider"] = provider return self @@ -485,9 +478,7 @@ def platform(self, platform: str = "") -> Type[GranuleQuery]: super().platform(platform) return self - def cloud_cover( - self, min_cover: int = 0, max_cover: int = 100 - ) -> Type[GranuleQuery]: + def cloud_cover(self, min_cover: int = 0, max_cover: int = 100) -> Type[GranuleQuery]: """Filter by the percentage of cloud cover present in the granule. Parameters: @@ -569,19 +560,14 @@ def get(self, limit: int = 2000) -> list: json_response = response.json()["items"] if len(json_response) > 0: if "CMR-Search-After" in response.headers: - headers["CMR-Search-After"] = response.headers[ - "CMR-Search-After" - ] + headers["CMR-Search-After"] = response.headers["CMR-Search-After"] else: headers = {} if self._is_cloud_hosted(json_response[0]): cloud = True else: cloud = False - latest = list( - DataGranule(granule, cloud_hosted=cloud) - for granule in response.json()["items"] - ) + latest = list(DataGranule(granule, cloud_hosted=cloud) for granule in response.json()["items"]) else: latest = [] else: @@ -680,9 +666,7 @@ def bounding_box( upper_right_lon: upper right longitude of the box upper_right_lat: upper right latitude of the box """ - super().bounding_box( - lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat - ) + super().bounding_box(lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat) return self def line(self, coordinates: List[Tuple[str, str]]) -> Type[GranuleQuery]: @@ -720,7 +704,5 @@ def doi(self, doi: str) -> Type[GranuleQuery]: concept_id = collection[0].concept_id() self.params["concept_id"] = concept_id else: - print( - f"earthaccess couldn't find any associated collections with the DOI: {doi}" - ) + print(f"earthaccess couldn't find any associated collections with the DOI: {doi}") return self diff --git a/earthaccess/store.py b/earthaccess/store.py index 68fec997..4bf358ec 100644 --- a/earthaccess/store.py +++ b/earthaccess/store.py @@ -10,13 +10,14 @@ from typing import Any, Dict, List, Optional, Union from uuid import uuid4 -import earthaccess import fsspec import requests import s3fs from multimethod import multimethod as singledispatchmethod from pqdm.threads import pqdm +import earthaccess + from .auth import Auth from .daac import DAAC_TEST_URLS, find_provider from .results import DataGranule @@ -64,9 +65,7 @@ def multi_thread_open(data: tuple) -> EarthAccessFile: return fileset -def make_instance( - cls: Any, granule: DataGranule, auth: Auth, data: Any -) -> EarthAccessFile: +def make_instance(cls: Any, granule: DataGranule, auth: Auth, data: Any) -> EarthAccessFile: # Attempt to re-authenticate if not earthaccess.__auth__.authenticated: earthaccess.__auth__ = auth @@ -139,18 +138,14 @@ def _am_i_in_aws(self) -> bool: session = self.auth.get_session() try: # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - resp = session.get( - "http://169.254.169.254/latest/meta-data/public-ipv4", timeout=1 - ) + resp = session.get("http://169.254.169.254/latest/meta-data/public-ipv4", timeout=1) except Exception: return False if resp.status_code == 200: return True return False - def set_requests_session( - self, url: str, method: str = "get", bearer_token: bool = False - ) -> None: + def set_requests_session(self, url: str, method: str = "get", bearer_token: bool = False) -> None: """Sets up a `requests` session with bearer tokens that are used by CMR. Mainly used to get the authentication cookies from different DAACs and URS This HTTPS session can be used to download granules if we want to use a direct, lower level API @@ -170,9 +165,7 @@ def set_requests_session( if resp.status_code in [400, 401, 403]: new_session = requests.Session() - resp_req = new_session.request( - method, url, allow_redirects=True, cookies=self._requests_cookies - ) + resp_req = new_session.request(method, url, allow_redirects=True, cookies=self._requests_cookies) if resp_req.status_code in [400, 401, 403]: resp.raise_for_status() else: @@ -203,8 +196,7 @@ def get_s3fs_session( if self.auth is not None: if not any([concept_id, daac, provider, endpoint]): raise ValueError( - "At least one of the concept_id, daac, provider or endpoint" - "parameters must be specified. " + "At least one of the concept_id, daac, provider or endpoint" "parameters must be specified. " ) if endpoint is not None: s3_credentials = self.auth.get_s3_credentials(endpoint=endpoint) @@ -219,9 +211,7 @@ def get_s3fs_session( delta_minutes = now - self.initial_ts # TODO: test this mocking the time or use https://github.com/dbader/schedule # if we exceed 1 hour - if ( - self.s3_fs is None or round(delta_minutes.seconds / 60, 2) > 59 - ) and s3_credentials is not None: + if (self.s3_fs is None or round(delta_minutes.seconds / 60, 2) > 59) and s3_credentials is not None: self.s3_fs = s3fs.S3FileSystem( key=s3_credentials["accessKeyId"], secret=s3_credentials["secretAccessKey"], @@ -230,9 +220,7 @@ def get_s3fs_session( self.initial_ts = datetime.datetime.now() return deepcopy(self.s3_fs) else: - raise ValueError( - "A valid Earthdata login instance is required to retrieve S3 credentials" - ) + raise ValueError("A valid Earthdata login instance is required to retrieve S3 credentials") @lru_cache def get_fsspec_session(self) -> fsspec.AbstractFileSystem: @@ -310,9 +298,7 @@ def _open_granules( print(f"Opening {len(granules)} granules, approx size: {total_size} GB") if self.auth is None: - raise ValueError( - "A valid Earthdata login instance is required to retrieve credentials" - ) + raise ValueError("A valid Earthdata login instance is required to retrieve credentials") if self.running_in_aws: if granules[0].cloud_hosted: @@ -330,11 +316,7 @@ def _open_granules( access_method = "on_prem" s3_fs = None - data_links = list( - chain.from_iterable( - granule.data_links(access=access_method) for granule in granules - ) - ) + data_links = list(chain.from_iterable(granule.data_links(access=access_method) for granule in granules)) if s3_fs is not None: try: @@ -355,11 +337,7 @@ def _open_granules( return fileset else: access_method = "on_prem" - data_links = list( - chain.from_iterable( - granule.data_links(access=access_method) for granule in granules - ) - ) + data_links = list(chain.from_iterable(granule.data_links(access=access_method) for granule in granules)) fileset = self._open_urls_https(data_links, granules, threads=threads) return fileset @@ -373,20 +351,14 @@ def _open_urls( fileset: List = [] data_links: List = [] - if isinstance(granules[0], str) and ( - granules[0].startswith("s3") or granules[0].startswith("http") - ): + if isinstance(granules[0], str) and (granules[0].startswith("s3") or granules[0].startswith("http")): # TODO: method to derive the DAAC from url? provider = provider data_links = granules else: - raise ValueError( - f"Schema for {granules[0]} is not recognized, must be an HTTP or S3 URL" - ) + raise ValueError(f"Schema for {granules[0]} is not recognized, must be an HTTP or S3 URL") if self.auth is None: - raise ValueError( - "A valid Earthdata login instance is required to retrieve S3 credentials" - ) + raise ValueError("A valid Earthdata login instance is required to retrieve S3 credentials") if self.running_in_aws and granules[0].startswith("s3"): if provider is not None: @@ -414,9 +386,7 @@ def _open_urls( ) else: if granules[0].startswith("s3"): - raise ValueError( - "We cannot open S3 links when we are not in-region, try using HTTPS links" - ) + raise ValueError("We cannot open S3 links when we are not in-region, try using HTTPS links") fileset = self._open_urls_https(data_links, granules, threads) return fileset @@ -530,19 +500,14 @@ def _get_granules( data_links = list( # we are not in region chain.from_iterable( - granule.data_links(access=access, in_region=self.running_in_aws) - for granule in granules + granule.data_links(access=access, in_region=self.running_in_aws) for granule in granules ) ) total_size = round(sum([granule.size() for granule in granules]) / 1024, 2) - print( - f" Getting {len(granules)} granules, approx download size: {total_size} GB" - ) + print(f" Getting {len(granules)} granules, approx download size: {total_size} GB") if access == "direct": if endpoint is not None: - print( - f"Accessing cloud dataset using dataset endpoint credentials: {endpoint}" - ) + print(f"Accessing cloud dataset using dataset endpoint credentials: {endpoint}") s3_fs = self.get_s3fs_session(endpoint=endpoint) else: print(f"Accessing cloud dataset using provider: {provider}") @@ -592,9 +557,7 @@ def _download_file(self, url: str, directory: str) -> str: print(f"File {local_filename} already downloaded") return local_path - def _download_onprem_granules( - self, urls: List[str], directory: str, threads: int = 8 - ) -> List[Any]: + def _download_onprem_granules(self, urls: List[str], directory: str, threads: int = 8) -> List[Any]: """ downloads a list of URLS into the data directory. :param urls: list of granule URLs from an on-prem collection @@ -605,9 +568,7 @@ def _download_onprem_granules( if urls is None: raise ValueError("The granules didn't provide a valid GET DATA link") if self.auth is None: - raise ValueError( - "We need to be logged into NASA EDL in order to download data granules" - ) + raise ValueError("We need to be logged into NASA EDL in order to download data granules") if not os.path.exists(directory): os.makedirs(directory) diff --git a/pyproject.toml b/pyproject.toml index 09788617..332d28b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,23 +97,11 @@ line-length = 120 src = ["earthaccess", "tests"] exclude = ["mypy-stubs", "stubs", "typeshed"] -[tool.ruff.format] -indent-style = "space" -quote-style = "single" - [tool.ruff.lint] extend-select = ["I"] -[tool.ruff.lint.pydocstyle] -convention = "google" - [tool.ruff.lint.isort] combine-as-imports = true -# unsupported isort arguments: -# --multi-line=3 -# --trailing-comma -# --force-grid-wrap=0 -# --line-width 88 [tool.bumpversion] diff --git a/tests/integration/test_auth.py b/tests/integration/test_auth.py index d1bfae1e..6d001f24 100644 --- a/tests/integration/test_auth.py +++ b/tests/integration/test_auth.py @@ -27,9 +27,7 @@ def activate_netrc(): password = os.environ["EARTHDATA_PASSWORD"] with open(NETRC_PATH, "w") as f: - f.write( - f"machine urs.earthdata.nasa.gov login {username} password {password}\n" - ) + f.write(f"machine urs.earthdata.nasa.gov login {username} password {password}\n") os.chmod(NETRC_PATH, 0o600) @@ -92,9 +90,7 @@ def test_auth_can_fetch_s3_credentials(): assertions.assertIsInstance(credentials, dict) assertions.assertTrue("accessKeyId" in credentials) except Exception as e: - print( - f"An error occured while trying to fetch S3 credentials for {daac['short-name']}: {e}" - ) + print(f"An error occured while trying to fetch S3 credentials for {daac['short-name']}: {e}") @pytest.mark.parametrize("location", ({"daac": "podaac"}, {"provider": "pocloud"})) @@ -103,10 +99,7 @@ def test_get_s3_credentials_lowercase_location(location): earthaccess.login(strategy="environment") creds = earthaccess.get_s3_credentials(**location) assert creds - assert all( - creds[key] - for key in ["accessKeyId", "secretAccessKey", "sessionToken", "expiration"] - ) + assert all(creds[key] for key in ["accessKeyId", "secretAccessKey", "sessionToken", "expiration"]) @pytest.mark.parametrize("location", ({"daac": "podaac"}, {"provider": "pocloud"})) diff --git a/tests/integration/test_cloud_download.py b/tests/integration/test_cloud_download.py index 4c76dc23..833a2f6d 100644 --- a/tests/integration/test_cloud_download.py +++ b/tests/integration/test_cloud_download.py @@ -123,17 +123,12 @@ def test_earthaccess_can_download_cloud_collection_granules(daac): assert type(granules) is list and len(granules) > 0 assert isinstance(granules[0], earthaccess.results.DataGranule) local_path = f"./tests/integration/data/{concept_id}" - granules_to_download, total_size_cmr = get_sample_granules( - granules, granules_sample_size, granules_max_size - ) + granules_to_download, total_size_cmr = get_sample_granules(granules, granules_sample_size, granules_max_size) if len(granules_to_download) == 0: - logger.warning( - f"Skipping {concept_id}, granule size exceeds configured max size" - ) + logger.warning(f"Skipping {concept_id}, granule size exceeds configured max size") continue logger.info( - f"Testing {concept_id}, granules in collection: {total_granules}, " - f"download size(MB): {total_size_cmr}" + f"Testing {concept_id}, granules in collection: {total_granules}, " f"download size(MB): {total_size_cmr}" ) # We are testing this method try: @@ -144,9 +139,7 @@ def test_earthaccess_can_download_cloud_collection_granules(daac): path = Path(local_path) assert path.is_dir() # test that we downloaded the mb reported by CMR - total_mb_downloaded = round( - (sum(file.stat().st_size for file in path.rglob("*")) / 1024**2) - ) + total_mb_downloaded = round((sum(file.stat().st_size for file in path.rglob("*")) / 1024**2)) # clean the directory shutil.rmtree(path) # test that we could download the data diff --git a/tests/integration/test_cloud_open.py b/tests/integration/test_cloud_open.py index d9d8fdea..a54bcf67 100644 --- a/tests/integration/test_cloud_open.py +++ b/tests/integration/test_cloud_open.py @@ -133,17 +133,12 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): if not supported_collection(data_links): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") continue - granules_to_open, total_size_cmr = get_sample_granules( - granules, granules_sample_size, granules_max_size - ) + granules_to_open, total_size_cmr = get_sample_granules(granules, granules_sample_size, granules_max_size) if len(granules_to_open) == 0: - logger.debug( - f"Skipping {concept_id}, granule size exceeds configured max size" - ) + logger.debug(f"Skipping {concept_id}, granule size exceeds configured max size") continue logger.info( - f"Testing {concept_id}, granules in collection: {total_granules}, " - f"download size(MB): {total_size_cmr}" + f"Testing {concept_id}, granules in collection: {total_granules}, " f"download size(MB): {total_size_cmr}" ) # We are testing this method diff --git a/tests/integration/test_onprem_download.py b/tests/integration/test_onprem_download.py index d54404c8..153028d5 100644 --- a/tests/integration/test_onprem_download.py +++ b/tests/integration/test_onprem_download.py @@ -127,17 +127,12 @@ def test_earthaccess_can_download_onprem_collection_granules(daac): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") continue local_path = f"./tests/integration/data/{concept_id}" - granules_to_download, total_size_cmr = get_sample_granules( - granules, granules_sample_size, granules_max_size - ) + granules_to_download, total_size_cmr = get_sample_granules(granules, granules_sample_size, granules_max_size) if len(granules_to_download) == 0: - logger.debug( - f"Skipping {concept_id}, granule size exceeds configured max size" - ) + logger.debug(f"Skipping {concept_id}, granule size exceeds configured max size") continue logger.info( - f"Testing {concept_id}, granules in collection: {total_granules}, " - f"download size(MB): {total_size_cmr}" + f"Testing {concept_id}, granules in collection: {total_granules}, " f"download size(MB): {total_size_cmr}" ) # We are testing this method downloaded_results = store.get(granules_to_download, local_path=local_path) @@ -148,9 +143,7 @@ def test_earthaccess_can_download_onprem_collection_granules(daac): path = Path(local_path) assertions.assertTrue(path.is_dir()) # test that we downloaded the mb reported by CMR - total_mb_downloaded = round( - (sum(file.stat().st_size for file in path.rglob("*")) / 1024**2), 2 - ) + total_mb_downloaded = round((sum(file.stat().st_size for file in path.rglob("*")) / 1024**2), 2) # clean the directory shutil.rmtree(path) diff --git a/tests/integration/test_onprem_open.py b/tests/integration/test_onprem_open.py index 02a2c60a..7cc852dc 100644 --- a/tests/integration/test_onprem_open.py +++ b/tests/integration/test_onprem_open.py @@ -125,17 +125,12 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): if not supported_collection(data_links): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") continue - granules_to_open, total_size_cmr = get_sample_granules( - granules, granules_sample_size, granules_max_size - ) + granules_to_open, total_size_cmr = get_sample_granules(granules, granules_sample_size, granules_max_size) if len(granules_to_open) == 0: - logger.debug( - f"Skipping {concept_id}, granule size exceeds configured max size" - ) + logger.debug(f"Skipping {concept_id}, granule size exceeds configured max size") continue logger.info( - f"Testing {concept_id}, granules in collection: {total_granules}, " - f"download size(MB): {total_size_cmr}" + f"Testing {concept_id}, granules in collection: {total_granules}, " f"download size(MB): {total_size_cmr}" ) # We are testing this method diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py index 510278bb..c497cb01 100644 --- a/tests/unit/test_auth.py +++ b/tests/unit/test_auth.py @@ -47,9 +47,7 @@ def test_auth_gets_proper_credentials(self, user_input, user_password) -> bool: @responses.activate @mock.patch("getpass.getpass") @mock.patch("builtins.input") - def test_auth_can_create_proper_credentials( - self, user_input, user_password - ) -> bool: + def test_auth_can_create_proper_credentials(self, user_input, user_password) -> bool: user_input.return_value = "user" user_password.return_value = "password" json_response = {"access_token": "EDL-token-1", "expiration_date": "12/15/2021"} From 7c2dc505262b15417f1b8b2ec8fd07d41bfa5e03 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Wed, 29 Nov 2023 21:52:29 -0900 Subject: [PATCH 03/11] manual fixes for issues raised by ruff check --- earthaccess/store.py | 2 +- tests/integration/test_cloud_download.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/earthaccess/store.py b/earthaccess/store.py index 4bf358ec..38c4819b 100644 --- a/earthaccess/store.py +++ b/earthaccess/store.py @@ -52,7 +52,7 @@ def _open_files( ) -> List[fsspec.AbstractFileSystem]: def multi_thread_open(data: tuple) -> EarthAccessFile: urls, granule = data - if type(granule) is not str: + if not isinstance(granule, str): if len(granule.data_links()) > 1: print( "Warning: This collection contains more than one file per granule. " diff --git a/tests/integration/test_cloud_download.py b/tests/integration/test_cloud_download.py index 833a2f6d..19ff75f2 100644 --- a/tests/integration/test_cloud_download.py +++ b/tests/integration/test_cloud_download.py @@ -120,7 +120,7 @@ def test_earthaccess_can_download_cloud_collection_granules(daac): granule_query = DataGranules().concept_id(concept_id) total_granules = granule_query.hits() granules = granule_query.get(granules_count) - assert type(granules) is list and len(granules) > 0 + assert isinstance(granules, list) and len(granules) > 0 assert isinstance(granules[0], earthaccess.results.DataGranule) local_path = f"./tests/integration/data/{concept_id}" granules_to_download, total_size_cmr = get_sample_granules(granules, granules_sample_size, granules_max_size) From c451ff0ee337e2ab8734023863a7b5fce0ea8a2e Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Wed, 29 Nov 2023 21:57:55 -0900 Subject: [PATCH 04/11] fix fixes --- earthaccess/daac.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/earthaccess/daac.py b/earthaccess/daac.py index b17c02a7..f3a0e728 100644 --- a/earthaccess/daac.py +++ b/earthaccess/daac.py @@ -110,14 +110,11 @@ # Some testing urls behind EDL DAAC_TEST_URLS = [ - ("https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/" "JASON_CS_S6A_L2_ALT_LR_STD_OST_NRT_F/"), - ("https://data.nsidc.earthdatacloud.nasa.gov/nsidc-cumulus-prod-protected/ATLAS/" "ATL03/005/2018/10/14/dummy.nc"), - ("https://n5eil01u.ecs.nsidc.org/DP7/ATLAS/ATL06.005/2018.10.14/" "ATL06_20181014045341_02380102_005_01.iso.xml"), - ("https://hydro1.gesdisc.eosdis.nasa.gov/data/GLDAS/GLDAS_NOAH10_M.2.0/1948/"), - ( - "https://e4ftl01.cr.usgs.gov//DP114/MOTA/MCD43A3.006/2000.02.24/" - "MCD43A3.A2000055.h15v07.006.2016101151720.hdf.xml" - ), + "https://archive.podaac.earthdata.nasa.gov/podaac-ops-cumulus-protected/JASON_CS_S6A_L2_ALT_LR_STD_OST_NRT_F/", + "https://data.nsidc.earthdatacloud.nasa.gov/nsidc-cumulus-prod-protected/ATLAS/ATL03/005/2018/10/14/dummy.nc", + "https://n5eil01u.ecs.nsidc.org/DP7/ATLAS/ATL06.005/2018.10.14/ATL06_20181014045341_02380102_005_01.iso.xml", + "https://hydro1.gesdisc.eosdis.nasa.gov/data/GLDAS/GLDAS_NOAH10_M.2.0/1948/", + "https://e4ftl01.cr.usgs.gov//DP114/MOTA/MCD43A3.006/2000.02.24/MCD43A3.A2000055.h15v07.006.2016101151720.hdf.xml", "https://daac.ornl.gov/daacdata/npp/grassland/NPP_BCN/data/bcn_cli.txt", ] From 7057173c9b4417164793797ec9d64af389e37f42 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Wed, 29 Nov 2023 22:07:41 -0900 Subject: [PATCH 05/11] poetry is weird yo --- poetry.lock | 2 +- pyproject.toml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index dd2916a4..137bfd15 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4490,4 +4490,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.8,<4.0" -content-hash = "e426c0c43eaa11a5591d1eab3ee1c21b0cb6e833b12ec8352a46c8a3fdad5b9e" +content-hash = "30b1db85e7b715a7a94b140b10d84baaa206342ff03900a579205f76d598d8eb" diff --git a/pyproject.toml b/pyproject.toml index 332d28b7..9cbc226d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,7 @@ mkdocs-jupyter = ">=0.19.0" pymdown-extensions = ">=9.2" pygments = ">=2.11.1" responses = ">=0.14" +ruff = "^0.1.6" types-requests = ">=0.1" types-setuptools = ">=0.1" ipywidgets = ">=7.7.0" @@ -69,8 +70,6 @@ dask = ">=2022.1" pyproj = ">=3.5.0" bump-my-version = ">=0.10.0" -[tool.poetry.group.dev.dependencies] -ruff = "^0.1.6" [build-system] requires = ["poetry>=0.12"] From ccd14ace3eb05f7070d5d793df71b6c222c6d3af Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Wed, 29 Nov 2023 22:15:03 -0900 Subject: [PATCH 06/11] Add github action for ruff --- .github/workflows/static-analysis.yml | 28 +++++++++++++++++++++++++++ .gitignore | 1 + 2 files changed, 29 insertions(+) create mode 100644 .github/workflows/static-analysis.yml diff --git a/.github/workflows/static-analysis.yml b/.github/workflows/static-analysis.yml new file mode 100644 index 00000000..315bd162 --- /dev/null +++ b/.github/workflows/static-analysis.yml @@ -0,0 +1,28 @@ +name: Lint and Format with Ruff + +on: push + +jobs: + check-with-ruff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: 3.x + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install ruff + + + - name: Ruff linting check + run: | + ruff check --output-format=github earthaccess tests + + - name: Ruff format check + run: | + ruff format --diff earthaccess tests diff --git a/.gitignore b/.gitignore index 86a27a3f..ea6fc19e 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,7 @@ build/ *.egg-info/ docs/tutorials/data tests/integration/data +.ruff_cache # OS X .DS_Store From 6fa4aee9c478f781fcf5a206399b209a34259dbc Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Wed, 29 Nov 2023 22:21:58 -0900 Subject: [PATCH 07/11] fix whitespace --- .github/workflows/static-analysis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/static-analysis.yml b/.github/workflows/static-analysis.yml index 315bd162..599bd3c2 100644 --- a/.github/workflows/static-analysis.yml +++ b/.github/workflows/static-analysis.yml @@ -17,7 +17,6 @@ jobs: run: | python -m pip install --upgrade pip pip install ruff - - name: Ruff linting check run: | From 984560772a1442ac4efaf9ced78597240249f455 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Thu, 30 Nov 2023 08:01:30 -0900 Subject: [PATCH 08/11] Update pyproject.toml Co-authored-by: Matt Fisher --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9cbc226d..c10583b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,7 +92,7 @@ ignore_errors = true [tool.ruff] -line-length = 120 +line-length = 88 src = ["earthaccess", "tests"] exclude = ["mypy-stubs", "stubs", "typeshed"] From e5899f58cbcb2a1cc20dc0f19ad8d95739f3dae9 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Thu, 30 Nov 2023 08:11:03 -0900 Subject: [PATCH 09/11] Apply suggestions from code review Co-authored-by: Matt Fisher --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4287d6bc..915a8913 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,5 +15,5 @@ repos: rev: v0.1.6 hooks: - id: ruff - args: [ --fix ] + args: ["--fix", "--exit-non-zero-on-fix"] - id: ruff-format From 7b2d5d1865096e7147e9bd816eca3c66763a6335 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Thu, 30 Nov 2023 08:27:03 -0900 Subject: [PATCH 10/11] reformat for lw=88 --- earthaccess/__init__.py | 8 ++- earthaccess/api.py | 16 +++-- earthaccess/auth.py | 64 ++++++++++++++----- earthaccess/daac.py | 8 ++- earthaccess/formatters.py | 5 +- earthaccess/results.py | 30 +++++++-- earthaccess/search.py | 34 +++++++--- earthaccess/store.py | 78 +++++++++++++++++------ tests/integration/test_auth.py | 13 +++- tests/integration/test_cloud_download.py | 15 +++-- tests/integration/test_cloud_open.py | 11 +++- tests/integration/test_onprem_download.py | 15 +++-- tests/integration/test_onprem_open.py | 11 +++- tests/unit/test_auth.py | 4 +- 14 files changed, 236 insertions(+), 76 deletions(-) diff --git a/earthaccess/__init__.py b/earthaccess/__init__.py index 18750843..b74948da 100644 --- a/earthaccess/__init__.py +++ b/earthaccess/__init__.py @@ -64,14 +64,18 @@ def __getattr__(name): # type: ignore try: _auth.login(strategy=strategy) except Exception as e: - logger.debug(f"An error occurred during automatic authentication with {strategy=}: {str(e)}") + logger.debug( + f"An error occurred during automatic authentication with {strategy=}: {str(e)}" + ) continue else: if not _auth.authenticated: continue else: _store = Store(_auth) - logger.debug(f"Automatic authentication with {strategy=} was successful") + logger.debug( + f"Automatic authentication with {strategy=} was successful" + ) break return _auth if name == "__auth__" else _store else: diff --git a/earthaccess/api.py b/earthaccess/api.py index e6a1d86c..8e518912 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -28,7 +28,9 @@ def _normalize_location(location: Union[str, None]) -> Union[str, None]: return location -def search_datasets(count: int = -1, **kwargs: Any) -> List[earthaccess.results.DataCollection]: +def search_datasets( + count: int = -1, **kwargs: Any +) -> List[earthaccess.results.DataCollection]: """Search datasets using NASA's CMR [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) @@ -64,7 +66,9 @@ def search_datasets(count: int = -1, **kwargs: Any) -> List[earthaccess.results. ``` """ if not validate.valid_dataset_parameters(**kwargs): - print("Warning: a valid set of parameters is needed to search for datasets on CMR") + print( + "Warning: a valid set of parameters is needed to search for datasets on CMR" + ) return [] if earthaccess.__auth__.authenticated: query = DataCollections(auth=earthaccess.__auth__).parameters(**kwargs) @@ -77,7 +81,9 @@ def search_datasets(count: int = -1, **kwargs: Any) -> List[earthaccess.results. return query.get_all() -def search_data(count: int = -1, **kwargs: Any) -> List[earthaccess.results.DataGranule]: +def search_data( + count: int = -1, **kwargs: Any +) -> List[earthaccess.results.DataGranule]: """Search dataset granules using NASA's CMR. [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) @@ -352,5 +358,7 @@ def get_edl_token() -> str: def auth_environ() -> Dict[str, str]: auth = earthaccess.__auth__ if not auth.authenticated: - raise RuntimeError("`auth_environ()` requires you to first authenticate with `earthaccess.login()`") + raise RuntimeError( + "`auth_environ()` requires you to first authenticate with `earthaccess.login()`" + ) return {"EARTHDATA_USERNAME": auth.username, "EARTHDATA_PASSWORD": auth.password} diff --git a/earthaccess/auth.py b/earthaccess/auth.py index d25129d8..9a3b22cb 100644 --- a/earthaccess/auth.py +++ b/earthaccess/auth.py @@ -23,7 +23,9 @@ class SessionWithHeaderRedirection(requests.Session): AUTH_HOST = "urs.earthdata.nasa.gov" - def __init__(self, username: Optional[str] = None, password: Optional[str] = None) -> None: + def __init__( + self, username: Optional[str] = None, password: Optional[str] = None + ) -> None: super().__init__() if username and password: self.auth = (username, password) @@ -93,7 +95,9 @@ def refresh_tokens(self) -> bool: This method renews the tokens to make sure we can query the collections allowed to our EDL user. """ if len(self.tokens) == 0: - resp_tokens = self._generate_user_token(username=self.username, password=self.password) + resp_tokens = self._generate_user_token( + username=self.username, password=self.password + ) if resp_tokens.ok: self.token = resp_tokens.json() self.tokens = [self.token] @@ -104,7 +108,9 @@ def refresh_tokens(self) -> bool: else: return False if len(self.tokens) == 1: - resp_tokens = self._generate_user_token(username=self.username, password=self.password) + resp_tokens = self._generate_user_token( + username=self.username, password=self.password + ) if resp_tokens.ok: self.token = resp_tokens.json() self.tokens.extend(self.token) @@ -118,7 +124,9 @@ def refresh_tokens(self) -> bool: if len(self.tokens) == 2: resp_revoked = self._revoke_user_token(self.token["access_token"]) if resp_revoked: - resp_tokens = self._generate_user_token(username=self.username, password=self.password) + resp_tokens = self._generate_user_token( + username=self.username, password=self.password + ) if resp_tokens.ok: self.token = resp_tokens.json() self.tokens[0] = self.token @@ -153,22 +161,34 @@ def get_s3_credentials( if self.authenticated: session = SessionWithHeaderRedirection(self.username, self.password) if endpoint is None: - auth_url = self._get_cloud_auth_url(daac_shortname=daac, provider=provider) + auth_url = self._get_cloud_auth_url( + daac_shortname=daac, provider=provider + ) else: auth_url = endpoint if auth_url.startswith("https://"): cumulus_resp = session.get(auth_url, timeout=15, allow_redirects=True) - auth_resp = session.get(cumulus_resp.url, allow_redirects=True, timeout=15) + auth_resp = session.get( + cumulus_resp.url, allow_redirects=True, timeout=15 + ) if not (auth_resp.ok): # type: ignore # Let's try to authenticate with Bearer tokens _session = self.get_session() - cumulus_resp = _session.get(auth_url, timeout=15, allow_redirects=True) - auth_resp = _session.get(cumulus_resp.url, allow_redirects=True, timeout=15) + cumulus_resp = _session.get( + auth_url, timeout=15, allow_redirects=True + ) + auth_resp = _session.get( + cumulus_resp.url, allow_redirects=True, timeout=15 + ) if not (auth_resp.ok): - print(f"Authentication with Earthdata Login failed with:\n{auth_resp.text[0:1000]}") + print( + f"Authentication with Earthdata Login failed with:\n{auth_resp.text[0:1000]}" + ) eula_url = "https://urs.earthdata.nasa.gov/users/earthaccess/unaccepted_eulas" apps_url = "https://urs.earthdata.nasa.gov/application_search" - print(f"Consider accepting the EULAs available at {eula_url} and applications at {apps_url}") + print( + f"Consider accepting the EULAs available at {eula_url} and applications at {apps_url}" + ) return {} return auth_resp.json() @@ -194,7 +214,9 @@ class Session instance with Auth and bearer token headers if bearer_token and self.authenticated: # This will avoid the use of the netrc after we are logged in session.trust_env = False - session.headers.update({"Authorization": f'Bearer {self.token["access_token"]}'}) + session.headers.update( + {"Authorization": f'Bearer {self.token["access_token"]}'} + ) return session def get_user_profile(self) -> Dict[str, Any]: @@ -221,7 +243,9 @@ def _netrc(self) -> bool: try: my_netrc = Netrc() except FileNotFoundError as err: - raise FileNotFoundError(f"No .netrc found in {os.path.expanduser('~')}") from err + raise FileNotFoundError( + f"No .netrc found in {os.path.expanduser('~')}" + ) from err except NetrcParseError as err: raise NetrcParseError("Unable to parse .netrc") from err if my_netrc["urs.earthdata.nasa.gov"] is not None: @@ -247,12 +271,16 @@ def _environment(self) -> bool: ) return authenticated - def _get_credentials(self, username: Optional[str], password: Optional[str]) -> bool: + def _get_credentials( + self, username: Optional[str], password: Optional[str] + ) -> bool: if username is not None and password is not None: token_resp = self._get_user_tokens(username, password) if not (token_resp.ok): # type: ignore - print(f"Authentication with Earthdata Login failed with:\n{token_resp.text}") + print( + f"Authentication with Earthdata Login failed with:\n{token_resp.text}" + ) return False logger.debug("You're now authenticated with NASA Earthdata Login") self.username = username @@ -269,7 +297,9 @@ def _get_credentials(self, username: Optional[str], password: Optional[str]) -> self.token = self.tokens[0] elif len(self.tokens) > 0: self.token = self.tokens[0] - logger.debug(f"Using token with expiration date: {self.token['expiration_date']}") + logger.debug( + f"Using token with expiration date: {self.token['expiration_date']}" + ) profile = self.get_user_profile() if "email_address" in profile: self.user_profile = profile @@ -330,7 +360,9 @@ def _persist_user_credentials(self, username: str, password: str) -> bool: my_netrc.save() return True - def _get_cloud_auth_url(self, daac_shortname: Optional[str] = "", provider: Optional[str] = "") -> str: + def _get_cloud_auth_url( + self, daac_shortname: Optional[str] = "", provider: Optional[str] = "" + ) -> str: for daac in DAACS: if ( daac_shortname == daac["short-name"] diff --git a/earthaccess/daac.py b/earthaccess/daac.py index f3a0e728..a15972c1 100644 --- a/earthaccess/daac.py +++ b/earthaccess/daac.py @@ -119,7 +119,9 @@ ] -def find_provider(daac_short_name: Optional[str] = None, cloud_hosted: Optional[bool] = None) -> Union[str, None]: +def find_provider( + daac_short_name: Optional[str] = None, cloud_hosted: Optional[bool] = None +) -> Union[str, None]: for daac in DAACS: if daac_short_name == daac["short-name"]: if cloud_hosted: @@ -136,7 +138,9 @@ def find_provider(daac_short_name: Optional[str] = None, cloud_hosted: Optional[ def find_provider_by_shortname(short_name: str, cloud_hosted: bool) -> Union[str, None]: base_url = "https://cmr.earthdata.nasa.gov/search/collections.umm_json?" - providers = requests.get(f"{base_url}&cloud_hosted={cloud_hosted}&short_name={short_name}").json() + providers = requests.get( + f"{base_url}&cloud_hosted={cloud_hosted}&short_name={short_name}" + ).json() if int(providers["hits"]) > 0: return providers["items"][0]["meta"]["provider-id"] else: diff --git a/earthaccess/formatters.py b/earthaccess/formatters.py index c35e9e3d..d37d4e22 100644 --- a/earthaccess/formatters.py +++ b/earthaccess/formatters.py @@ -8,7 +8,10 @@ def _load_static_files() -> List[str]: """Load styles""" - return [pkg_resources.resource_string("earthaccess", fname).decode("utf8") for fname in STATIC_FILES] + return [ + pkg_resources.resource_string("earthaccess", fname).decode("utf8") + for fname in STATIC_FILES + ] def _repr_collection_html() -> str: diff --git a/earthaccess/results.py b/earthaccess/results.py index 37e8582b..a466e1c3 100644 --- a/earthaccess/results.py +++ b/earthaccess/results.py @@ -28,9 +28,17 @@ def __init__( self.render_dict = self._filter_fields_(fields) def _filter_fields_(self, fields: List[str]) -> Dict[str, Any]: - filtered_dict = {"umm": dict((field, self["umm"][field]) for field in fields if field in self["umm"])} + filtered_dict = { + "umm": dict( + (field, self["umm"][field]) for field in fields if field in self["umm"] + ) + } basic_dict = { - "meta": dict((field, self["meta"][field]) for field in self._basic_meta_fields_ if field in self["meta"]) + "meta": dict( + (field, self["meta"][field]) + for field in self._basic_meta_fields_ + if field in self["meta"] + ) } basic_dict.update(filtered_dict) return basic_dict @@ -113,7 +121,11 @@ def data_type(self) -> str: If available, it returns the collection data type, i.e. HDF5, CSV etc """ if "ArchiveAndDistributionInformation" in self["umm"]: - return str(self["umm"]["ArchiveAndDistributionInformation"]["FileDistributionInformation"]) + return str( + self["umm"]["ArchiveAndDistributionInformation"][ + "FileDistributionInformation" + ] + ) return "" def version(self) -> str: @@ -162,7 +174,9 @@ def s3_bucket(self) -> Dict[str, Any]: return {} def __repr__(self) -> str: - return json.dumps(self.render_dict, sort_keys=False, indent=2, separators=(",", ": ")) + return json.dumps( + self.render_dict, sort_keys=False, indent=2, separators=(",", ": ") + ) class DataGranule(CustomDict): @@ -264,11 +278,15 @@ def _derive_s3_link(self, links: List[str]) -> List[str]: for link in links: if link.startswith("s3"): s3_links.append(link) - elif link.startswith("https://") and ("cumulus" in link or "protected" in link): + elif link.startswith("https://") and ( + "cumulus" in link or "protected" in link + ): s3_links.append(f's3://{links[0].split("nasa.gov/")[1]}') return s3_links - def data_links(self, access: Optional[str] = None, in_region: bool = False) -> List[str]: + def data_links( + self, access: Optional[str] = None, in_region: bool = False + ) -> List[str]: """Returns the data links form a granule Parameters: diff --git a/earthaccess/search.py b/earthaccess/search.py index 70fab447..0ed3b61d 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -243,7 +243,10 @@ def get(self, limit: int = 2000) -> list: if self._format == "json": latest = response.json()["feed"]["entry"] elif self._format == "umm_json": - latest = list(DataCollection(collection, self._fields) for collection in response.json()["items"]) + latest = list( + DataCollection(collection, self._fields) + for collection in response.json()["items"] + ) else: latest = [response.text] @@ -255,7 +258,9 @@ def get(self, limit: int = 2000) -> list: return results - def temporal(self, date_from: str, date_to: str, exclude_boundary: bool = False) -> Type[CollectionQuery]: + def temporal( + self, date_from: str, date_to: str, exclude_boundary: bool = False + ) -> Type[CollectionQuery]: """Filter by an open or closed date range. Dates can be provided as datetime objects or ISO 8601 formatted strings. Multiple ranges can be provided by successive calls. to this method before calling execute(). @@ -419,7 +424,9 @@ def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: raise TypeError("cloud_hosted must be of type bool") if "short_name" in self.params: - provider = find_provider_by_shortname(self.params["short_name"], cloud_hosted) + provider = find_provider_by_shortname( + self.params["short_name"], cloud_hosted + ) if provider is not None: self.params["provider"] = provider return self @@ -478,7 +485,9 @@ def platform(self, platform: str = "") -> Type[GranuleQuery]: super().platform(platform) return self - def cloud_cover(self, min_cover: int = 0, max_cover: int = 100) -> Type[GranuleQuery]: + def cloud_cover( + self, min_cover: int = 0, max_cover: int = 100 + ) -> Type[GranuleQuery]: """Filter by the percentage of cloud cover present in the granule. Parameters: @@ -560,14 +569,19 @@ def get(self, limit: int = 2000) -> list: json_response = response.json()["items"] if len(json_response) > 0: if "CMR-Search-After" in response.headers: - headers["CMR-Search-After"] = response.headers["CMR-Search-After"] + headers["CMR-Search-After"] = response.headers[ + "CMR-Search-After" + ] else: headers = {} if self._is_cloud_hosted(json_response[0]): cloud = True else: cloud = False - latest = list(DataGranule(granule, cloud_hosted=cloud) for granule in response.json()["items"]) + latest = list( + DataGranule(granule, cloud_hosted=cloud) + for granule in response.json()["items"] + ) else: latest = [] else: @@ -666,7 +680,9 @@ def bounding_box( upper_right_lon: upper right longitude of the box upper_right_lat: upper right latitude of the box """ - super().bounding_box(lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat) + super().bounding_box( + lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat + ) return self def line(self, coordinates: List[Tuple[str, str]]) -> Type[GranuleQuery]: @@ -704,5 +720,7 @@ def doi(self, doi: str) -> Type[GranuleQuery]: concept_id = collection[0].concept_id() self.params["concept_id"] = concept_id else: - print(f"earthaccess couldn't find any associated collections with the DOI: {doi}") + print( + f"earthaccess couldn't find any associated collections with the DOI: {doi}" + ) return self diff --git a/earthaccess/store.py b/earthaccess/store.py index 38c4819b..5bd97d43 100644 --- a/earthaccess/store.py +++ b/earthaccess/store.py @@ -65,7 +65,9 @@ def multi_thread_open(data: tuple) -> EarthAccessFile: return fileset -def make_instance(cls: Any, granule: DataGranule, auth: Auth, data: Any) -> EarthAccessFile: +def make_instance( + cls: Any, granule: DataGranule, auth: Auth, data: Any +) -> EarthAccessFile: # Attempt to re-authenticate if not earthaccess.__auth__.authenticated: earthaccess.__auth__ = auth @@ -138,14 +140,18 @@ def _am_i_in_aws(self) -> bool: session = self.auth.get_session() try: # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - resp = session.get("http://169.254.169.254/latest/meta-data/public-ipv4", timeout=1) + resp = session.get( + "http://169.254.169.254/latest/meta-data/public-ipv4", timeout=1 + ) except Exception: return False if resp.status_code == 200: return True return False - def set_requests_session(self, url: str, method: str = "get", bearer_token: bool = False) -> None: + def set_requests_session( + self, url: str, method: str = "get", bearer_token: bool = False + ) -> None: """Sets up a `requests` session with bearer tokens that are used by CMR. Mainly used to get the authentication cookies from different DAACs and URS This HTTPS session can be used to download granules if we want to use a direct, lower level API @@ -165,7 +171,9 @@ def set_requests_session(self, url: str, method: str = "get", bearer_token: bool if resp.status_code in [400, 401, 403]: new_session = requests.Session() - resp_req = new_session.request(method, url, allow_redirects=True, cookies=self._requests_cookies) + resp_req = new_session.request( + method, url, allow_redirects=True, cookies=self._requests_cookies + ) if resp_req.status_code in [400, 401, 403]: resp.raise_for_status() else: @@ -196,7 +204,8 @@ def get_s3fs_session( if self.auth is not None: if not any([concept_id, daac, provider, endpoint]): raise ValueError( - "At least one of the concept_id, daac, provider or endpoint" "parameters must be specified. " + "At least one of the concept_id, daac, provider or endpoint" + "parameters must be specified. " ) if endpoint is not None: s3_credentials = self.auth.get_s3_credentials(endpoint=endpoint) @@ -211,7 +220,9 @@ def get_s3fs_session( delta_minutes = now - self.initial_ts # TODO: test this mocking the time or use https://github.com/dbader/schedule # if we exceed 1 hour - if (self.s3_fs is None or round(delta_minutes.seconds / 60, 2) > 59) and s3_credentials is not None: + if ( + self.s3_fs is None or round(delta_minutes.seconds / 60, 2) > 59 + ) and s3_credentials is not None: self.s3_fs = s3fs.S3FileSystem( key=s3_credentials["accessKeyId"], secret=s3_credentials["secretAccessKey"], @@ -220,7 +231,9 @@ def get_s3fs_session( self.initial_ts = datetime.datetime.now() return deepcopy(self.s3_fs) else: - raise ValueError("A valid Earthdata login instance is required to retrieve S3 credentials") + raise ValueError( + "A valid Earthdata login instance is required to retrieve S3 credentials" + ) @lru_cache def get_fsspec_session(self) -> fsspec.AbstractFileSystem: @@ -298,7 +311,9 @@ def _open_granules( print(f"Opening {len(granules)} granules, approx size: {total_size} GB") if self.auth is None: - raise ValueError("A valid Earthdata login instance is required to retrieve credentials") + raise ValueError( + "A valid Earthdata login instance is required to retrieve credentials" + ) if self.running_in_aws: if granules[0].cloud_hosted: @@ -316,7 +331,11 @@ def _open_granules( access_method = "on_prem" s3_fs = None - data_links = list(chain.from_iterable(granule.data_links(access=access_method) for granule in granules)) + data_links = list( + chain.from_iterable( + granule.data_links(access=access_method) for granule in granules + ) + ) if s3_fs is not None: try: @@ -337,7 +356,11 @@ def _open_granules( return fileset else: access_method = "on_prem" - data_links = list(chain.from_iterable(granule.data_links(access=access_method) for granule in granules)) + data_links = list( + chain.from_iterable( + granule.data_links(access=access_method) for granule in granules + ) + ) fileset = self._open_urls_https(data_links, granules, threads=threads) return fileset @@ -351,14 +374,20 @@ def _open_urls( fileset: List = [] data_links: List = [] - if isinstance(granules[0], str) and (granules[0].startswith("s3") or granules[0].startswith("http")): + if isinstance(granules[0], str) and ( + granules[0].startswith("s3") or granules[0].startswith("http") + ): # TODO: method to derive the DAAC from url? provider = provider data_links = granules else: - raise ValueError(f"Schema for {granules[0]} is not recognized, must be an HTTP or S3 URL") + raise ValueError( + f"Schema for {granules[0]} is not recognized, must be an HTTP or S3 URL" + ) if self.auth is None: - raise ValueError("A valid Earthdata login instance is required to retrieve S3 credentials") + raise ValueError( + "A valid Earthdata login instance is required to retrieve S3 credentials" + ) if self.running_in_aws and granules[0].startswith("s3"): if provider is not None: @@ -386,7 +415,9 @@ def _open_urls( ) else: if granules[0].startswith("s3"): - raise ValueError("We cannot open S3 links when we are not in-region, try using HTTPS links") + raise ValueError( + "We cannot open S3 links when we are not in-region, try using HTTPS links" + ) fileset = self._open_urls_https(data_links, granules, threads) return fileset @@ -500,14 +531,19 @@ def _get_granules( data_links = list( # we are not in region chain.from_iterable( - granule.data_links(access=access, in_region=self.running_in_aws) for granule in granules + granule.data_links(access=access, in_region=self.running_in_aws) + for granule in granules ) ) total_size = round(sum([granule.size() for granule in granules]) / 1024, 2) - print(f" Getting {len(granules)} granules, approx download size: {total_size} GB") + print( + f" Getting {len(granules)} granules, approx download size: {total_size} GB" + ) if access == "direct": if endpoint is not None: - print(f"Accessing cloud dataset using dataset endpoint credentials: {endpoint}") + print( + f"Accessing cloud dataset using dataset endpoint credentials: {endpoint}" + ) s3_fs = self.get_s3fs_session(endpoint=endpoint) else: print(f"Accessing cloud dataset using provider: {provider}") @@ -557,7 +593,9 @@ def _download_file(self, url: str, directory: str) -> str: print(f"File {local_filename} already downloaded") return local_path - def _download_onprem_granules(self, urls: List[str], directory: str, threads: int = 8) -> List[Any]: + def _download_onprem_granules( + self, urls: List[str], directory: str, threads: int = 8 + ) -> List[Any]: """ downloads a list of URLS into the data directory. :param urls: list of granule URLs from an on-prem collection @@ -568,7 +606,9 @@ def _download_onprem_granules(self, urls: List[str], directory: str, threads: in if urls is None: raise ValueError("The granules didn't provide a valid GET DATA link") if self.auth is None: - raise ValueError("We need to be logged into NASA EDL in order to download data granules") + raise ValueError( + "We need to be logged into NASA EDL in order to download data granules" + ) if not os.path.exists(directory): os.makedirs(directory) diff --git a/tests/integration/test_auth.py b/tests/integration/test_auth.py index 6d001f24..d1bfae1e 100644 --- a/tests/integration/test_auth.py +++ b/tests/integration/test_auth.py @@ -27,7 +27,9 @@ def activate_netrc(): password = os.environ["EARTHDATA_PASSWORD"] with open(NETRC_PATH, "w") as f: - f.write(f"machine urs.earthdata.nasa.gov login {username} password {password}\n") + f.write( + f"machine urs.earthdata.nasa.gov login {username} password {password}\n" + ) os.chmod(NETRC_PATH, 0o600) @@ -90,7 +92,9 @@ def test_auth_can_fetch_s3_credentials(): assertions.assertIsInstance(credentials, dict) assertions.assertTrue("accessKeyId" in credentials) except Exception as e: - print(f"An error occured while trying to fetch S3 credentials for {daac['short-name']}: {e}") + print( + f"An error occured while trying to fetch S3 credentials for {daac['short-name']}: {e}" + ) @pytest.mark.parametrize("location", ({"daac": "podaac"}, {"provider": "pocloud"})) @@ -99,7 +103,10 @@ def test_get_s3_credentials_lowercase_location(location): earthaccess.login(strategy="environment") creds = earthaccess.get_s3_credentials(**location) assert creds - assert all(creds[key] for key in ["accessKeyId", "secretAccessKey", "sessionToken", "expiration"]) + assert all( + creds[key] + for key in ["accessKeyId", "secretAccessKey", "sessionToken", "expiration"] + ) @pytest.mark.parametrize("location", ({"daac": "podaac"}, {"provider": "pocloud"})) diff --git a/tests/integration/test_cloud_download.py b/tests/integration/test_cloud_download.py index 19ff75f2..4ecc3137 100644 --- a/tests/integration/test_cloud_download.py +++ b/tests/integration/test_cloud_download.py @@ -123,12 +123,17 @@ def test_earthaccess_can_download_cloud_collection_granules(daac): assert isinstance(granules, list) and len(granules) > 0 assert isinstance(granules[0], earthaccess.results.DataGranule) local_path = f"./tests/integration/data/{concept_id}" - granules_to_download, total_size_cmr = get_sample_granules(granules, granules_sample_size, granules_max_size) + granules_to_download, total_size_cmr = get_sample_granules( + granules, granules_sample_size, granules_max_size + ) if len(granules_to_download) == 0: - logger.warning(f"Skipping {concept_id}, granule size exceeds configured max size") + logger.warning( + f"Skipping {concept_id}, granule size exceeds configured max size" + ) continue logger.info( - f"Testing {concept_id}, granules in collection: {total_granules}, " f"download size(MB): {total_size_cmr}" + f"Testing {concept_id}, granules in collection: {total_granules}, " + f"download size(MB): {total_size_cmr}" ) # We are testing this method try: @@ -139,7 +144,9 @@ def test_earthaccess_can_download_cloud_collection_granules(daac): path = Path(local_path) assert path.is_dir() # test that we downloaded the mb reported by CMR - total_mb_downloaded = round((sum(file.stat().st_size for file in path.rglob("*")) / 1024**2)) + total_mb_downloaded = round( + (sum(file.stat().st_size for file in path.rglob("*")) / 1024**2) + ) # clean the directory shutil.rmtree(path) # test that we could download the data diff --git a/tests/integration/test_cloud_open.py b/tests/integration/test_cloud_open.py index a54bcf67..d9d8fdea 100644 --- a/tests/integration/test_cloud_open.py +++ b/tests/integration/test_cloud_open.py @@ -133,12 +133,17 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): if not supported_collection(data_links): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") continue - granules_to_open, total_size_cmr = get_sample_granules(granules, granules_sample_size, granules_max_size) + granules_to_open, total_size_cmr = get_sample_granules( + granules, granules_sample_size, granules_max_size + ) if len(granules_to_open) == 0: - logger.debug(f"Skipping {concept_id}, granule size exceeds configured max size") + logger.debug( + f"Skipping {concept_id}, granule size exceeds configured max size" + ) continue logger.info( - f"Testing {concept_id}, granules in collection: {total_granules}, " f"download size(MB): {total_size_cmr}" + f"Testing {concept_id}, granules in collection: {total_granules}, " + f"download size(MB): {total_size_cmr}" ) # We are testing this method diff --git a/tests/integration/test_onprem_download.py b/tests/integration/test_onprem_download.py index 153028d5..d54404c8 100644 --- a/tests/integration/test_onprem_download.py +++ b/tests/integration/test_onprem_download.py @@ -127,12 +127,17 @@ def test_earthaccess_can_download_onprem_collection_granules(daac): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") continue local_path = f"./tests/integration/data/{concept_id}" - granules_to_download, total_size_cmr = get_sample_granules(granules, granules_sample_size, granules_max_size) + granules_to_download, total_size_cmr = get_sample_granules( + granules, granules_sample_size, granules_max_size + ) if len(granules_to_download) == 0: - logger.debug(f"Skipping {concept_id}, granule size exceeds configured max size") + logger.debug( + f"Skipping {concept_id}, granule size exceeds configured max size" + ) continue logger.info( - f"Testing {concept_id}, granules in collection: {total_granules}, " f"download size(MB): {total_size_cmr}" + f"Testing {concept_id}, granules in collection: {total_granules}, " + f"download size(MB): {total_size_cmr}" ) # We are testing this method downloaded_results = store.get(granules_to_download, local_path=local_path) @@ -143,7 +148,9 @@ def test_earthaccess_can_download_onprem_collection_granules(daac): path = Path(local_path) assertions.assertTrue(path.is_dir()) # test that we downloaded the mb reported by CMR - total_mb_downloaded = round((sum(file.stat().st_size for file in path.rglob("*")) / 1024**2), 2) + total_mb_downloaded = round( + (sum(file.stat().st_size for file in path.rglob("*")) / 1024**2), 2 + ) # clean the directory shutil.rmtree(path) diff --git a/tests/integration/test_onprem_open.py b/tests/integration/test_onprem_open.py index 7cc852dc..02a2c60a 100644 --- a/tests/integration/test_onprem_open.py +++ b/tests/integration/test_onprem_open.py @@ -125,12 +125,17 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): if not supported_collection(data_links): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") continue - granules_to_open, total_size_cmr = get_sample_granules(granules, granules_sample_size, granules_max_size) + granules_to_open, total_size_cmr = get_sample_granules( + granules, granules_sample_size, granules_max_size + ) if len(granules_to_open) == 0: - logger.debug(f"Skipping {concept_id}, granule size exceeds configured max size") + logger.debug( + f"Skipping {concept_id}, granule size exceeds configured max size" + ) continue logger.info( - f"Testing {concept_id}, granules in collection: {total_granules}, " f"download size(MB): {total_size_cmr}" + f"Testing {concept_id}, granules in collection: {total_granules}, " + f"download size(MB): {total_size_cmr}" ) # We are testing this method diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py index c497cb01..510278bb 100644 --- a/tests/unit/test_auth.py +++ b/tests/unit/test_auth.py @@ -47,7 +47,9 @@ def test_auth_gets_proper_credentials(self, user_input, user_password) -> bool: @responses.activate @mock.patch("getpass.getpass") @mock.patch("builtins.input") - def test_auth_can_create_proper_credentials(self, user_input, user_password) -> bool: + def test_auth_can_create_proper_credentials( + self, user_input, user_password + ) -> bool: user_input.return_value = "user" user_password.return_value = "password" json_response = {"access_token": "EDL-token-1", "expiration_date": "12/15/2021"} From 7ed8385aaca2d63c15d9934ddc14e0ee7da4a2b1 Mon Sep 17 00:00:00 2001 From: Joseph H Kennedy Date: Thu, 30 Nov 2023 08:45:32 -0900 Subject: [PATCH 11/11] ruff everything! --- .github/workflows/static-analysis.yml | 4 ++-- scripts/format.sh | 4 ++-- scripts/lint.sh | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/static-analysis.yml b/.github/workflows/static-analysis.yml index 599bd3c2..826982aa 100644 --- a/.github/workflows/static-analysis.yml +++ b/.github/workflows/static-analysis.yml @@ -20,8 +20,8 @@ jobs: - name: Ruff linting check run: | - ruff check --output-format=github earthaccess tests + ruff check --output-format=github . - name: Ruff format check run: | - ruff format --diff earthaccess tests + ruff format --diff . diff --git a/scripts/format.sh b/scripts/format.sh index aaeb10e5..631d46b5 100755 --- a/scripts/format.sh +++ b/scripts/format.sh @@ -1,5 +1,5 @@ #!/bin/sh -e set -x -ruff check --fix earthaccess tests -ruff format earthaccess tests +ruff check --fix . +ruff format . diff --git a/scripts/lint.sh b/scripts/lint.sh index b8f7088f..3a528811 100755 --- a/scripts/lint.sh +++ b/scripts/lint.sh @@ -4,4 +4,4 @@ set -e set -x mypy earthaccess --disallow-untyped-defs -ruff check earthaccess tests +ruff check .