diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 9bb0b782..353e993f 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -46,6 +46,9 @@ jobs: run: poetry install - name: Build docs run: poetry run bash scripts/build-docs.sh + env: + EARTHDATA_USERNAME: ${{ secrets.EDL_USERNAME }} + EARTHDATA_PASSWORD: ${{ secrets.EDL_PASSWORD }} - name: Deploy if: | diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml new file mode 100644 index 00000000..8b6352c6 --- /dev/null +++ b/.github/workflows/integration-test.yml @@ -0,0 +1,55 @@ +name: Integration Tests + +on: + push: + branches: + - main + paths: + - earthaccess/** + - tests/** + - docs/** + - binder/** + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.8, 3.9, '3.10', '3.11'] + fail-fast: false + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Get full python version + id: full-python-version + run: echo ::set-output name=version::$(python -c "import sys; print('-'.join(str(v) for v in sys.version_info))") + - name: Install poetry + run: | + curl -sSL https://install.python-poetry.org | python3 - + echo "$HOME/.poetry/bin" >> $GITHUB_PATH + - name: Configure poetry + run: poetry config virtualenvs.in-project true + - name: Set up cache + uses: actions/cache@v1 + id: cache + with: + path: .venv + key: venv-${{ runner.os }}-${{ steps.full-python-version.outputs.version }}-${{ hashFiles('**/poetry.lock') }} + - name: Ensure cache is healthy + if: steps.cache.outputs.cache-hit == 'true' + run: poetry run pip --version >/dev/null 2>&1 || rm -rf .venv + - name: Install Dependencies + run: poetry install + - name: Test + env: + EARTHDATA_USERNAME: ${{ secrets.EDL_USERNAME }} + EARTHDATA_PASSWORD: ${{ secrets.EDL_PASSWORD }} + EARTHACCESS_TEST_USERNAME: ${{ secrets.EDL_USERNAME }} + EARTHACCESS_TEST_PASSWORD: ${{ secrets.EDL_PASSWORD }} + run: poetry run bash scripts/integration-test.sh + - name: Upload coverage + uses: codecov/codecov-action@v1 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e1fafed5..67059457 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,4 +1,4 @@ -name: Test +name: Unit Tests on: push: @@ -46,11 +46,6 @@ jobs: - name: Install Dependencies run: poetry install - name: Test - env: - EARTHDATA_USERNAME: ${{ secrets.EDL_USERNAME }} - EARTHDATA_PASSWORD: ${{ secrets.EDL_PASSWORD }} - EARTHACCESS_TEST_USERNAME: ${{ secrets.EDL_USERNAME }} - EARTHACCESS_TEST_PASSWORD: ${{ secrets.EDL_PASSWORD }} run: poetry run bash scripts/test.sh - name: Upload coverage uses: codecov/codecov-action@v1 diff --git a/CHANGELOG.md b/CHANGELOG.md index ee42c5d4..ac2b2aea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## Unreleased +* bug fixes: + * granule's size() returned zero + * Added exception handling for fsspec sessions, thanks to @jrbourbeau +* CI changes: + * integration tests are now only run when we push to main (after a merge) + * unit tests run for any branch and opened PR + ## [v0.5.2] 2023-04-21 * bug fixes: * Fixing #230 by removing Benedict as the dict handler, thanks to @psarka! diff --git a/README.md b/README.md index 0384ee48..ad46417f 100644 --- a/README.md +++ b/README.md @@ -12,12 +12,16 @@ + + + + - +
@@ -94,12 +98,11 @@ If we are not sure or we don't know how to search for a particular dataset, we c ```python results = earthaccess.search_data( - short_name='ATL06', - version="005", + short_name='SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205', cloud_hosted=True, bounding_box=(-10, 20, 10, 50), - temporal=("2020-02", "2020-03"), - count=100 + temporal=("1999-02", "2019-03"), + count=10 ) @@ -140,7 +143,9 @@ This method works best if you are in the same Amazon Web Services (AWS) region a ```python import xarray as xr -ds = xr.open_mfdataset(earthaccess.open(results)) +files = earthaccess.open(results) + +ds = xr.open_mfdataset(files) ``` diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md new file mode 120000 index 00000000..44fcc634 --- /dev/null +++ b/docs/CONTRIBUTING.md @@ -0,0 +1 @@ +../CONTRIBUTING.md \ No newline at end of file diff --git a/docs/tutorials/authenticate.md b/docs/tutorials/authenticate.md index b280f457..e685941d 100644 --- a/docs/tutorials/authenticate.md +++ b/docs/tutorials/authenticate.md @@ -1,7 +1,13 @@ ## Authenticate with Earthdata Login +earthaccess can use environment variables, `.netrc` file or interactive input from a user to login with NASA EDL. + +If a strategy is not especified, env vars will be used first, then netrc and finally user's input. + ```py import earthaccess + +auth = earthaccess.login() ``` If you have a .netrc file with your Earthdata Login credentials diff --git a/docs/tutorials/edl.ipynb b/docs/tutorials/file-access.ipynb similarity index 99% rename from docs/tutorials/edl.ipynb rename to docs/tutorials/file-access.ipynb index a1946000..6a4fe498 100644 --- a/docs/tutorials/edl.ipynb +++ b/docs/tutorials/file-access.ipynb @@ -106,7 +106,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.14" + "version": "3.9.16" } }, "nbformat": 4, diff --git a/docs/tutorials/demo.ipynb b/docs/tutorials/queries.ipynb similarity index 81% rename from docs/tutorials/demo.ipynb rename to docs/tutorials/queries.ipynb index 71a7e6ac..766a270e 100644 --- a/docs/tutorials/demo.ipynb +++ b/docs/tutorials/queries.ipynb @@ -8,11 +8,7 @@ }, "source": [ "\n", - "## Overview\n", - "\n", - "\n", - "# Introducing NASA earthaccess 🌍\n", - "\n", + "# Querying CMR using earthaccess\n", "\n", "\n", "#### TL;DR: [**earthaccess**](https://github.com/nsidc/earthaccess) is a Python package to search, preview and access NASA datasets (on-prem or in the cloud) with a few lines of code.\n", @@ -38,20 +34,91 @@ "\n", "Earthdata Login provides free and immediate access to thousands of EOSDIS data products covering all Earth science disciplines and topic areas for researchers, applied science users, application developers, and the general public.\n", "\n", - "Once we have our NASA EDL login credentials we can start accessing NASA data in a programmatic way.\n" + "Once we have our NASA EDL login credentials we can start accessing NASA data in a programmatic way.\n", + "\n", + "\n", + "## Querying CMR using earthaccess\n", + "\n", + "This short tutorial uses the `collection_query()` and `granule_query()` methods, these methods return a lower level Query Builder instance that can be used to query NASA's CMR.\n", + "For convenience the top level API also offers the `dataset_search(**kwargs)` and `data_search(**kwargs)` methods that map what these query builders do. \n", + "\n", + "For instance \n", + "\n", + "```python\n", + "query = earthaccess.granule_query().doi(\"some_doi\").temporal(\"1990-01-01\", \"2020-12-31\").cloud_hosted(True)\n", + "granules = query.get(10)\n", + "\n", + "```\n", + "\n", + "is equivalent to\n", + "\n", + "```python\n", + "granules = earthaccess.search_data(\n", + " doi=\"some_doi\",\n", + " temporal = (\"1990-01-01\",\"2020-12-31\"),\n", + " cloud_hosted=True,\n", + " limit=10\n", + ")\n", + "```" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "10f6c9ed-fe58-4e03-b29b-c6c447061f84", - "metadata": {}, - "outputs": [], + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'0.5.3'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import earthaccess\n", "earthaccess.__version__" ] }, + { + "cell_type": "code", + "execution_count": 2, + "id": "496c1e3e-5b1a-44f8-ae13-84c42ea814af", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "EARTHDATA_USERNAME and EARTHDATA_PASSWORD are not set in the current environment, try setting them or use a different strategy (netrc, interactive)\n", + "You're now authenticated with NASA Earthdata Login\n", + "Using token with expiration date: 09/24/2023\n", + "Using .netrc file for EDL\n" + ] + } + ], + "source": [ + "auth = earthaccess.login()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39ba3dfb-a2b3-459a-ba51-dd6446c20872", + "metadata": {}, + "outputs": [], + "source": [ + "token yarn" + ] + }, { "cell_type": "markdown", "id": "95121ff7-5222-4778-a4de-25625e23884b", @@ -85,12 +152,14 @@ "cell_type": "code", "execution_count": null, "id": "caab3b4b-80cc-4790-9417-1dd12503aa55", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# are we authenticated?\n", "\n", - "auth = earthaccess.login(strategy=\"netrc\")\n" + "auth = earthaccess.login()\n" ] }, { @@ -107,7 +176,9 @@ "cell_type": "code", "execution_count": null, "id": "8d5bf4c9-571b-4c93-af94-e66bd51cb584", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# The first step is to create a DataCollections query \n", @@ -139,7 +210,9 @@ "cell_type": "code", "execution_count": null, "id": "8cb5154c-f131-44ad-a68f-cf0fa21ce18f", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "collections[0][\"umm\"][\"ShortName\"]" @@ -171,11 +244,13 @@ "cell_type": "code", "execution_count": null, "id": "48cdcd74-dfe3-4b83-93f4-7378a0d981df", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# We can now search for collections using a pythonic API client for CMR.\n", - "Query = earthaccess.collection_query().daac(\"PODAAC\")\n", + "Query = earthaccess.collection_query().daac(\"ASF\")\n", "\n", "print(f'Collections found: {Query.hits()}')\n", "collections = Query.fields(['ShortName']).get(10)\n", @@ -187,11 +262,13 @@ "cell_type": "code", "execution_count": null, "id": "63792353-ab3e-4f0b-963d-7750e4b89113", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# What if we want cloud collections\n", - "Query = earthaccess.collection_query().daac(\"PODAAC\").cloud_hosted(True)\n", + "Query = earthaccess.collection_query().daac(\"ASF\").cloud_hosted(True)\n", "\n", "print(f'Collections found: {Query.hits()}')\n", "collections = Query.fields(['ShortName']).get(10)\n", @@ -203,7 +280,9 @@ "cell_type": "code", "execution_count": null, "id": "c4c5a34a-e808-4cc9-b34d-353d091a8242", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# Printing the concept-id for the first 10 collections\n", @@ -230,17 +309,18 @@ "cell_type": "code", "execution_count": null, "id": "9364d737-5a79-4089-853f-76d2ad1c85a7", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from pprint import pprint\n", "\n", "# We build our query\n", "\n", - "Query = earthaccess.granule_query().short_name('ATL06').version(\"005\").bounding_box(-134.7,58.9,-133.9,59.2)\n", + "Query = earthaccess.granule_query().short_name('HLSL30').bounding_box(-134.7,58.9,-133.9,59.2)\n", "# We get 5 metadata records\n", - "granules = Query.get(5)\n", - "granules" + "granules = Query.get(5)" ] }, { @@ -257,12 +337,13 @@ { "cell_type": "code", "execution_count": null, - "id": "66cd5f5c-a854-4a72-a831-33b8bd7ce9d2", - "metadata": {}, + "id": "0b56b119-ec9b-4922-911a-f37501597451", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "# printing 2 granules using display\n", - "[display(granule) for granule in granules]" + "[display(g) for g in granules]" ] }, { @@ -280,7 +361,9 @@ "cell_type": "code", "execution_count": null, "id": "00aa39ec-e2fb-49d1-bc54-8d8a2f0655aa", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "Query = earthaccess.granule_query().short_name(\"ATL06\").temporal(\"2020-03-01\", \"2020-03-30\").bounding_box(-134.7,58.9,-133.9,59.2).version(\"005\")\n", @@ -292,7 +375,9 @@ "cell_type": "code", "execution_count": null, "id": "8c493585-0d48-41bb-8815-6c83ad20ae80", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# Now we can print some info about these granules using the built-in methods\n", @@ -313,6 +398,27 @@ "## On-prem access: DAAC hosted data 📡\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7b80520-5cae-45c5-9397-f990a1ba0f26", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "granules = []\n", + "\n", + "# we just grab 1 granule from May for each year of the dataset\n", + "for year in range(1999, 2019):\n", + " results = earthaccess.search_data(\n", + " doi = \"10.5067/SLREF-CDRV3\",\n", + " temporal=(f\"{year}-05\", f\"{year}-06\")\n", + " )\n", + " if len(results)>0:\n", + " granules.append(results[0])" + ] + }, { "cell_type": "markdown", "id": "4239e041-db87-40d1-b81a-12c26e9e0a47", @@ -325,15 +431,18 @@ "cell_type": "code", "execution_count": null, "id": "910e4b90-f0e0-42e5-a4e2-d5444089161f", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import earthaccess\n", "\n", "earthaccess.login()\n", "\n", - "Query = earthaccess.granule_query().short_name(\"SMAP_JPL_L3_SSS_CAP_8DAY-RUNNINGMEAN_V5\").bounding_box(-134.7,54.9,-100.9,69.2)\n", + "Query = earthaccess.granule_query().doi(\"10.5067/SLREF-CDRV3\").bounding_box(-134.7,54.9,-100.9,69.2)\n", "print(f\"Granule hits: {Query.hits()}\")\n", + "\n", "# getting more than 6,000 metadata records for demo purposes is going to slow us down a bit so let's get only a few\n", "granules = Query.get(10)\n", "# Does this granule belong to a cloud-based collection?\n", @@ -354,11 +463,13 @@ "cell_type": "code", "execution_count": null, "id": "434466a3-602b-4dff-a260-f7db6901514a", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "%%time\n", - "files = earthaccess.download(granules[0:4], \"./data/C1972955240-PODAAC/\")" + "files = earthaccess.download(granules[0:2], \"./data/C1972955240-PODAAC/\")" ] }, { @@ -381,7 +492,9 @@ "cell_type": "code", "execution_count": null, "id": "44403d51-0aa3-423c-8fff-e40d4969aa9d", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "\n", @@ -396,7 +509,9 @@ "cell_type": "code", "execution_count": null, "id": "5e59ca3e-b5d5-490f-b967-01d1c7b3fdf0", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "# Let's pretty print this\n", @@ -407,7 +522,9 @@ "cell_type": "code", "execution_count": null, "id": "b2a294f1-b1f9-4cd4-8751-dfc32feacec1", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "%%time\n", @@ -436,7 +553,9 @@ "cell_type": "code", "execution_count": null, "id": "aecdb529-5961-4fa6-b7e0-70bbd0d85041", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import warnings\n", @@ -451,8 +570,7 @@ "\n", "for granule in results:\n", " https_links.extend(granule.data_links(access=\"on_prem\"))\n", - " s3_links.extend(granule.data_links(access=\"direct\"))\n", - "s3_links" + " s3_links.extend(granule.data_links(access=\"direct\"))" ] }, { @@ -466,19 +584,17 @@ { "cell_type": "code", "execution_count": null, - "id": "e693af6a-a80e-4ca2-a034-8da194c18aaf", - "metadata": {}, + "id": "50e6f01e-86f0-4e29-869b-d6d437c8b130", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "%%time\n", + "fileset = earthaccess.open(results[0:3])\n", "\n", - "ds_L3 = xr.open_mfdataset(\n", - " earthaccess.open(results[0:3]),\n", - " combine='nested',\n", - " concat_dim='time',\n", - " coords='minimal',\n", - " )\n", - "ds_L3" + "# test that we can read data from the files\n", + "with fileset[0] as f:\n", + " print(f.read(100))" ] }, { @@ -548,7 +664,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.14" + "version": "3.9.16" } }, "nbformat": 4, diff --git a/docs/tutorials/restricted-datasets.ipynb b/docs/tutorials/restricted-datasets.ipynb index 942e849b..437193be 100644 --- a/docs/tutorials/restricted-datasets.ipynb +++ b/docs/tutorials/restricted-datasets.ipynb @@ -309,7 +309,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.9.16" } }, "nbformat": 4, diff --git a/earthaccess/api.py b/earthaccess/api.py index e17b8229..ccb62200 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -1,10 +1,11 @@ from typing import Any, Dict, List, Optional, Type, Union -import earthaccess import requests import s3fs from fsspec import AbstractFileSystem +import earthaccess + from .auth import Auth from .search import CollectionQuery, DataCollections, DataGranules, GranuleQuery from .store import Store @@ -168,8 +169,7 @@ def download( except AttributeError as err: print(err) print("You must call earthaccess.login() before you can download data") - return None - + return [] return results diff --git a/earthaccess/formatters.py b/earthaccess/formatters.py index fbbaba86..d37d4e22 100644 --- a/earthaccess/formatters.py +++ b/earthaccess/formatters.py @@ -23,7 +23,7 @@ def _repr_granule_html(granule: Any) -> str: css_inline = f""" """ - style = "max-height: 140px;" + style = "max-height: 120px;" dataviz_img = "".join( [ f'' @@ -47,7 +47,7 @@ def _repr_granule_html(granule: Any) -> str:Data: {data_links}
Size: {granule_size} MB
-Spatial: {granule["umm"]["SpatialExtent"]}
+Cloud Hosted: {granule.cloud_hosted}