From f9e7800f9cca242dd7f3aa3a8a807eefe98522c6 Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Thu, 16 Jul 2020 00:35:37 -0400 Subject: [PATCH 01/25] update default STAC_API_URL --- satsearch/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satsearch/search.py b/satsearch/search.py index 425d175..4ca175c 100644 --- a/satsearch/search.py +++ b/satsearch/search.py @@ -10,7 +10,7 @@ logger = logging.getLogger(__name__) -API_URL = os.getenv('STAC_API_URL', 'https://1tqdbvsut9.execute-api.us-west-2.amazonaws.com/v0').rstrip('/') + '/' +API_URL = os.getenv('STAC_API_URL', 'https://earth-search.aws.element84.com/v0').rstrip('/') + '/' class SatSearchError(Exception): From e21e05e844a0a62293e8443b8c2662a0e359b81b Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Thu, 16 Jul 2020 01:13:37 -0400 Subject: [PATCH 02/25] disable print-assets CLI option --- satsearch/cli.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/satsearch/cli.py b/satsearch/cli.py index cc1d27b..556c3a0 100644 --- a/satsearch/cli.py +++ b/satsearch/cli.py @@ -36,7 +36,8 @@ def __init__(self, *args, **kwargs): self.output_group.add_argument('--print-md', help=h, default=None, nargs='*', dest='printmd') h = 'Print calendar showing dates' self.output_group.add_argument('--print-cal', help=h, dest='printcal') - self.output_group.add_argument('--print-assets', help=h, dest='printassets', default=False, action='store_true') + #h = 'Print Item Asset definition from Collections' + #self.output_group.add_argument('--print-assets', help=h, dest='printassets', default=False, action='store_true') self.output_group.add_argument('--save', help='Save results as GeoJSON', default=None) def parse_args(self, *args, **kwargs): @@ -112,7 +113,7 @@ def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, n, {'eq': v}) -def main(items=None, printmd=None, printcal=None, printassets=None, +def main(items=None, printmd=None, printcal=None, #printassets=None, found=False, filename_template='${collection}/${date}/${id}', save=None, download=None, requester_pays=False, headers=None, **kwargs): """ Main function for performing a search """ @@ -139,8 +140,8 @@ def main(items=None, printmd=None, printcal=None, printassets=None, if printcal: print(items.calendar(printcal)) - if printassets: - print(items.assets_definition()) + #if printassets: + # print(items.assets_definition()) # save all metadata in JSON file if save is not None: From fe7bbbd641fcfe7bf99baff48f97accba788d353 Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Thu, 16 Jul 2020 01:13:49 -0400 Subject: [PATCH 03/25] update README --- README.md | 101 ++++++++++++++++++++++++------------------------------ 1 file changed, 44 insertions(+), 57 deletions(-) diff --git a/README.md b/README.md index e0ff29c..44e5b09 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ The table below shows the corresponding versions between sat-search and STAC. Ad | -------- | ---- | | 0.1.x | 0.5.x - 0.6.x | | 0.2.x | 0.5.x - 0.7.x | -| 0.3.x | 0.9.x | +| 0.3.x | 0.9.x - 1.0.0-beta.2 | ## Using sat-search @@ -52,7 +52,7 @@ The sat-search CLI has an extensive online help that can be printed with the `-h $ sat-search -h usage: sat-search [-h] {search,load} ... -sat-search (v0.2.0) +sat-search (v0.3.0) positional arguments: {search,load} @@ -69,13 +69,11 @@ As can be seen there are two subcommands, `search` and `load`, each of which has ``` $ sat-search search -h -usage: sat-search search [-h] [--version] [-v VERBOSITY] - [--print-md [PRINTMD [PRINTMD ...]]] [--print-cal] - [--save SAVE] [-c COLLECTION] [--ids [IDS [IDS ...]]] - [--bbox BBOX BBOX BBOX BBOX] - [--intersects INTERSECTS] [--datetime DATETIME] - [--sort [SORT [SORT ...]]] [--found] - [-p [PROPERTY [PROPERTY ...]]] [--url URL] +usage: sat-search search [-h] [--version] [-v VERBOSITY] [--print-md [PRINTMD [PRINTMD ...]]] + [--print-cal PRINTCAL] [--save SAVE] [-c [COLLECTIONS [COLLECTIONS ...]]] + [--ids [IDS [IDS ...]]] [--bbox BBOX BBOX BBOX BBOX] [--intersects INTERSECTS] + [--datetime DATETIME] [-q [QUERY [QUERY ...]]] [--sortby [SORTBY [SORTBY ...]]] [--found] + [--url URL] [--headers HEADERS] optional arguments: -h, --help show this help message and exit @@ -86,50 +84,45 @@ optional arguments: output options: --print-md [PRINTMD [PRINTMD ...]] Print specified metadata for matched scenes (default: None) - --print-cal Print calendar showing dates (default: False) + --print-cal PRINTCAL Print calendar showing dates (default: None) --save SAVE Save results as GeoJSON (default: None) search options: - -c COLLECTION, --collection COLLECTION + -c [COLLECTIONS [COLLECTIONS ...]], --collections [COLLECTIONS [COLLECTIONS ...]] Name of collection (default: None) --ids [IDS [IDS ...]] - One or more scene IDs from provided collection - (ignores other parameters) (default: None) + One or more scene IDs from provided collection (ignores other parameters) (default: None) --bbox BBOX BBOX BBOX BBOX - Bounding box (min lon, min lat, max lon, max lat) - (default: None) + Bounding box (min lon, min lat, max lon, max lat) (default: None) --intersects INTERSECTS GeoJSON Feature (file or string) (default: None) - --datetime DATETIME Single date/time or begin and end date/time (e.g., - 2017-01-01/2017-02-15) (default: None) - --sort [SORT [SORT ...]] + --datetime DATETIME Single date/time or begin and end date/time (e.g., 2017-01-01/2017-02-15) (default: None) + -q [QUERY [QUERY ...]], --query [QUERY [QUERY ...]] + Query properties of form KEY=VALUE (<, >, <=, >=, = supported) (default: None) + --sortby [SORTBY [SORTBY ...]] Sort by fields (default: None) --found Only output how many Items found (default: False) - -p [PROPERTY [PROPERTY ...]], --property [PROPERTY [PROPERTY ...]] - Properties of form KEY=VALUE (<, >, <=, >=, = - supported) (default: None) - --url URL URL of the API (default: https://n34f767n91.execute- - api.us-east-1.amazonaws.com/prod) - --headers HEADERS - JSON Request Headers (file or string) (default: None) + --url URL URL of the API (default: https://earth-search.aws.element84.com/v0/) + --headers HEADERS Additional request headers (JSON file or string) (default: None) ``` **Search options** -- **collection** - Search only a specific collection. This is a shortcut, collection can also be provided as a property (e.g., `-p "collection=landsat-8-l1"`) +- **collections** - Search only a specific collections. This is a shortcut, collection can also be provided as a query (e.g., `-q "collection=landsat-8-l1"`) - **ids** - Fetch the Item for the provided IDs in the given collection (collection must be provided). All other search options will be ignored. - **intersects** - Provide a GeoJSON Feature string or the name of a GeoJSON file containing a single Feature that is a Polygon of an AOI to be searched. - **datetime** - Provide a single partial or full datetime (e.g., 2017, 2017-10, 2017-10-11, 2017-10-11T12:00), or two seperated by a slash that defines a range. e.g., 2017-01-01/2017-06-30 will search for scenes acquired in the first 6 months of 2017. -- **property** - Allows searching for any other scene properties by providing the pair as a string (e.g. `-p "landsat:row=42"`, `-p "eo:cloud_cover<10"`). Supported symbols include: =, <, >, >=, and <= -- **sort** - Sort by specific properties in ascending or descending order. A list of properties can be provided which will be used for sorting in that order of preference. By default a property will be sorted in descending order. To specify the order the property can be preceded with '<' (ascending) or '>' (descending). e.g., `--sort ">datetime" ", >=, and <= +- **sortby** - Sort by specific properties in ascending or descending order. A list of properties can be provided which will be used for sorting in that order of preference. By default a property will be sorted in descending order. To specify the order the property can be preceded with '<' (ascending) or '>' (descending). e.g., `--sort ">datetime" "}: Any STAC Item property may be used, e.g. "${eo:cloud_cover}", "${platform} + The actual filename will be this prefix followed by the asset key and an appropriate extension. For example, specifying `filename_template` as "./${eo:platform}/${date}/${id}" will save assets for each Item under directories of the platform and the date. Thus, a landsat-8 Item from June 20, 2018 will have it's assets saved in a directory './landsat-8/2017-06-20/'. A metadata asset with the key `MTL` would be saved as './landsat-8/2017-06-20/LC80090292018275LGN00_MTL.TIF'. The last component of the filename_template is taken as the filename. See example directory structure below. ``` landsat-8/ @@ -212,4 +199,4 @@ A shortcut to download all of the assets is available by providing "ALL" as the This [Jupyter notebook tutorial](tutorial-1.ipynb) covers all the main features of the library. ## About -sat-search was created by [Development Seed]() and is part of a collection of tools called [sat-utils](https://github.com/sat-utils). +sat-search is part of a collection of tools called [sat-utils](https://github.com/sat-utils). From 12eb5a5f90421c4d196c94938740ef65008309e2 Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Thu, 16 Jul 2020 01:24:29 -0400 Subject: [PATCH 04/25] update notebook tutorial --- tutorial-1.ipynb | 169 ++++++++++++++++++++--------------------------- 1 file changed, 73 insertions(+), 96 deletions(-) diff --git a/tutorial-1.ipynb b/tutorial-1.ipynb index 8c09281..fe1360e 100644 --- a/tutorial-1.ipynb +++ b/tutorial-1.ipynb @@ -12,7 +12,7 @@ "\n", "Only the `search` module is in sat-search is used as a library, and it contains a single class, `Search`. The `parser` module is used for creating a CLI parser, and `main` contains the main function used in the CLI.\n", "\n", - "**API endpoint**: Sat-search uses an endpoint defined by the SATUTILS_API_URL environment variable. This defaults to https://sat-api.developmentseed.org/ but can point to any STAC compatible endpoint." + "**API endpoint**: Sat-search uses an endpoint defined by the STAC_API_URL environment variable. This defaults to https://earth-search.aws.element84.com/v0 but can point to any STAC compatible endpoint." ] }, { @@ -70,9 +70,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "bbox search: 6626 items\n", - "time search: 254719 items\n", - "cloud_cover search: 2241357 items\n" + "bbox search: 10305 items\n", + "time search: 474806 items\n", + "cloud_cover search: 7359438 items\n" ] } ], @@ -82,7 +82,7 @@ "search = Search(bbox=[-110, 39.5, -105, 40.5])\n", "print('bbox search: %s items' % search.found())\n", "\n", - "search = Search(time='2018-02-12T00:00:00Z/2018-03-18T12:31:12Z')\n", + "search = Search(datetime='2018-02-12T00:00:00Z/2018-03-18T12:31:12Z')\n", "print('time search: %s items' % search.found())\n", "\n", "search = Search(query={'eo:cloud_cover': {'lt': 10}})\n", @@ -107,14 +107,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "39 items\n" + "21 items\n" ] } ], "source": [ "search = Search(bbox=[-110, 39.5, -105, 40.5],\n", - " time='2018-02-12T00:00:00Z/2018-03-18T12:31:12Z',\n", - " query={'eo:cloud_cover': {'lt': 10}})\n", + " datetime='2018-02-12T00:00:00Z/2018-03-18T12:31:12Z',\n", + " query={'eo:cloud_cover': {'lt': 10}},\n", + " collections=['sentinel-s2-l2a'])\n", "print('%s items' % search.found())" ] }, @@ -136,7 +137,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "intersects search: 2657 items\n" + "intersects search: 4815 items\n" ] } ], @@ -179,22 +180,9 @@ "source": [ "## Alternate search syntax\n", "\n", - "This all works fine, except the syntax for creating queries is a bit verbose, so sat-search provides a factory function (Search.search) that uses an alternate syntax that is translated into proper STAC queries. This is the query syntax used by the sat-search CLI.\n", + "This all works fine, except the syntax for creating queries is a bit verbose, so sat-search allows an alternate syntax using simple strings of the property and equality symbols. \n", "\n", - "The keywords accepted by the Search.search function are slightly different:\n", - "\n", - "- bbox (same)\n", - "- intersects (same)\n", - "- time (same)\n", - "- datetime: this is an alias to 'time'\n", - "- ids: This is a list of IDs to fetch directly. The 'collection' keyword msut be provided in this case and all other keywords are ignored.\n", - "- collection: this can be provided as a property, but this is a shortcut since individual collections are frequently searched on their own.\n", - "- property: instead of `query`, and uses alternate syntax\n", - "- sort: uses alternate syntax\n", - "\n", - "The alternate syntax for `query` and `sort` uses simple strings and equality symbols.\n", - "\n", - "A typical query is shown below for eo:cloud_cover and collection, along with the alternate versions that use the `Search::search()` factory function." + "A typical query is shown below for eo:cloud_cover, along with the alternate versions that use the alternate syntax." ] }, { @@ -206,9 +194,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "679597 items found\n", - "679597 items found\n", - "679597 items found\n" + "7359438 items found\n", + "7359438 items found\n" ] } ], @@ -216,20 +203,13 @@ "query = {\n", " \"eo:cloud_cover\": {\n", " \"lt\": 10\n", - " },\n", - " \"collection\": {\n", - " \"eq\": \"landsat-8-l1\"\n", " }\n", "}\n", "\n", "search = Search(query=query)\n", "print('%s items found' % search.found())\n", "\n", - "search = Search.search(property=[\"eo:cloud_cover<10\", \"collection=landsat-8-l1\"])\n", - "print('%s items found' % search.found())\n", - "\n", - "# or use collection shortcut\n", - "search = Search.search(collection='landsat-8-l1', property=[\"eo:cloud_cover<10\"])\n", + "search = Search.search(query=[\"eo:cloud_cover<10\"])\n", "print('%s items found' % search.found())" ] }, @@ -251,16 +231,25 @@ "name": "stdout", "output_type": "stream", "text": [ - "6 items\n", - "6 items\n", + "15 items\n", + "15 items\n", "2 collections\n", - "[landsat-8-l1, sentinel-2-l1c]\n", - "LC80340332018034LGN00\n", - "LC80340322018034LGN00\n", - "S2A_12SWJ_20180202_0\n", - "S2A_12SXJ_20180202_0\n", - "S2A_12TXK_20180202_0\n", - "S2A_12TWK_20180202_0\n" + "[sentinel-s2-l2a, sentinel-s2-l1c]\n", + "S2B_12TXK_20180204_0_L1C\n", + "S2B_12TYK_20180204_0_L1C\n", + "S2B_12TWK_20180204_0_L1C\n", + "S2B_13TBE_20180204_0_L1C\n", + "S2B_13TBE_20180204_0_L2A\n", + "S2B_12TYK_20180204_0_L2A\n", + "S2B_12TWK_20180204_0_L2A\n", + "S2B_13TCE_20180204_0_L2A\n", + "S2B_12TXK_20180204_0_L2A\n", + "S2A_12SWJ_20180202_0_L1C\n", + "S2A_12SXJ_20180202_0_L1C\n", + "S2A_12SXJ_20180202_0_L2A\n", + "S2A_12SWJ_20180202_0_L2A\n", + "S2B_13SED_20180201_0_L1C\n", + "S2B_13SED_20180201_0_L2A\n" ] } ], @@ -293,14 +282,21 @@ "execution_count": 6, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "There are more items found (15) than the limit (2) provided.\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ "Items (2):\n", "date id \n", - "2018-02-03 LC80340332018034LGN00 \n", - "2018-02-03 LC80340322018034LGN00 \n", + "2018-02-04 S2B_12TXK_20180204_0_L1C \n", + "2018-02-04 S2B_12TYK_20180204_0_L1C \n", "\n" ] } @@ -330,8 +326,8 @@ "text": [ "Items (2):\n", "date id \n", - "2018-02-03 LC80340332018034LGN00 \n", - "2018-02-03 LC80340322018034LGN00 \n", + "2018-02-04 S2B_12TXK_20180204_0_L1C \n", + "2018-02-04 S2B_12TYK_20180204_0_L1C \n", "\n" ] } @@ -343,37 +339,47 @@ { "cell_type": "code", "execution_count": 8, - "metadata": {}, + "metadata": { + "scrolled": false + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Items (2):\n", + "Items (6):\n", "date id \n", - "2018-02-08 LC80370332018039LGN00 \n", - "2018-02-03 LC80340332018034LGN00 \n", + "2018-02-07 S2B_12TWK_20180207_0_L2A \n", + "2018-02-07 S2B_12TXK_20180207_0_L2A \n", + "2018-02-07 S2B_12SWJ_20180207_0_L2A \n", + "2018-02-07 S2B_12SXJ_20180207_0_L2A \n", + "2018-02-02 S2A_12SXJ_20180202_0_L2A \n", + "2018-02-02 S2A_12SWJ_20180202_0_L2A \n", "\n", - "Items (2):\n", + "Items (6):\n", "date id eo:cloud_cover \n", - "2018-02-08 LC80370332018039LGN00 19 \n", - "2018-02-03 LC80340332018034LGN00 36 \n", + "2018-02-07 S2B_12TWK_20180207_0_L2A 2.45 \n", + "2018-02-07 S2B_12TXK_20180207_0_L2A 17.91 \n", + "2018-02-07 S2B_12SWJ_20180207_0_L2A 12.85 \n", + "2018-02-07 S2B_12SXJ_20180207_0_L2A 2.54 \n", + "2018-02-02 S2A_12SXJ_20180202_0_L2A 5.74 \n", + "2018-02-02 S2A_12SWJ_20180202_0_L2A 18.46 \n", "\n" ] } ], "source": [ - "from satstac import Items\n", + "from satstac import ItemCollection\n", "\n", "search = Search.search(bbox=[-110, 39.5, -105, 40.5],\n", " datetime='2018-02-01/2018-02-10',\n", - " property=[\"eo:cloud_cover<50\"],\n", - " collection='landsat-8-l1')\n", + " query=[\"eo:cloud_cover<25\"],\n", + " collections=['sentinel-s2-l2a'])\n", "items = search.items()\n", "print(items.summary())\n", "\n", "items.save('test.json')\n", - "items2 = Items.load('test.json')\n", + "items2 = ItemCollection.open('test.json')\n", "\n", "print(items2.summary(['date', 'id', 'eo:cloud_cover']))" ] @@ -387,58 +393,29 @@ "name": "stdout", "output_type": "stream", "text": [ - "['downloads/2018-02-08/LC80370332018039LGN00_MTL.txt', 'downloads/2018-02-03/LC80340332018034LGN00_MTL.txt']\n" + "['downloads/2018-02-07/S2B_12TWK_20180207_0_L2A_metadata.xml', 'downloads/2018-02-07/S2B_12TXK_20180207_0_L2A_metadata.xml', 'downloads/2018-02-07/S2B_12SWJ_20180207_0_L2A_metadata.xml', 'downloads/2018-02-07/S2B_12SXJ_20180207_0_L2A_metadata.xml', 'downloads/2018-02-02/S2A_12SXJ_20180202_0_L2A_metadata.xml', 'downloads/2018-02-02/S2A_12SWJ_20180202_0_L2A_metadata.xml']\n" ] } ], "source": [ "# download a specific asset from all items and put in a directory by date in 'downloads'\n", - "filenames = items.download('MTL', path='downloads/${date}')\n", + "filenames = items.download('metadata', filename_template='downloads/${date}/${id}')\n", "print(filenames)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Fetching specific IDs\n", - "\n", - "A STAC API doesn't provide for searching by IDs because they can referenced directly within their collection (e.g., /collections/landsat-8-l1/items/LC80340332018034LGN00). However, the alternate search in sat-search allows for searching by IDs, as long as the collection is also provided.\n", - "\n", - "To simply get an `Items` object from a list of IDs, provide the ids and the collection name to the `Search::items_by_id()` function" - ] - }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Items (2):\n", - "date id \n", - "2018-02-03 LC80340332018034LGN00 \n", - "2018-02-03 LC80340322018034LGN00 \n", - "\n" - ] - } - ], - "source": [ - "ids = ['LC80340332018034LGN00', 'LC80340322018034LGN00']\n", - "search = Search.search(ids=ids, collection='landsat-8-l1')\n", - "items = search.items()\n", - "\n", - "print(items.summary())" - ] + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python3 sat-utils", + "display_name": "Python 3", "language": "python", - "name": "testenv" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -450,7 +427,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.1" + "version": "3.8.3" } }, "nbformat": 4, From b817a4db0da0b74d1e872fae4f98e6a045610034 Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Thu, 16 Jul 2020 01:24:36 -0400 Subject: [PATCH 05/25] bump version --- satsearch/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satsearch/version.py b/satsearch/version.py index aa1d540..0404d81 100644 --- a/satsearch/version.py +++ b/satsearch/version.py @@ -1 +1 @@ -__version__ = '0.3.0-rc1' +__version__ = '0.3.0' From 56227bcfed5e7b44e4d8a56b602a5e126ff1ddbe Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Thu, 16 Jul 2020 01:28:54 -0400 Subject: [PATCH 06/25] update CHANGELOG --- CHANGELOG.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fa5f2d7..cc4bc2b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,11 +4,18 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [v0.3.0] - 2020-07-16 ## Changed -- Updated to work with [STAC API v0.9.0-rc1](https://github.com/radiantearth/stac-spec/blob/master/CHANGELOG.md#v090-rc1---2020-01-06) +- Updated to work with STAC API v0.9.0 and v1.0.0-beta.2 - `SATUTILS_API_URL` envvar changed to `STAC_API_URL` +- Refactored how envvar was set for URL to fix issues on some platforms +- When downloading, specify `filename_template` for location instead of both `datadir` and `filename`. + +## [v0.2.3] - 2019-06-25 + +### Changed +- Default SATUTILS_API_URL changed to account for domain name change ## [v0.2.2] - 2019-09-20 @@ -49,6 +56,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. Initial Release [Unreleased]: https://github.com/sat-utils/sat-search/compare/master...develop +[v0.3.0]: https://github.com/sat-utils/sat-search/compare/0.2.3...v0.3.0 +[v0.2.3]: https://github.com/sat-utils/sat-search/compare/0.2.2...v0.2.3 [v0.2.2]: https://github.com/sat-utils/sat-search/compare/0.2.1...v0.2.2 [v0.2.1]: https://github.com/sat-utils/sat-search/compare/0.2.0...v0.2.1 [v0.2.0]: https://github.com/sat-utils/sat-search/compare/0.1.0...v0.2.0 From 4f9fb189949eb18a202cfb33e6a809c6768634cc Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Thu, 16 Jul 2020 01:30:17 -0400 Subject: [PATCH 07/25] update tests to account for removed printassets param in cli --- test/test_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_cli.py b/test/test_cli.py index 5488c93..ce71289 100644 --- a/test/test_cli.py +++ b/test/test_cli.py @@ -30,7 +30,7 @@ def test_empty_parse_search_args(self): """ Parse empty arguments """ parser = self.get_test_parser() args = parser.parse_args(['search']) - self.assertEqual(len(args), 4) + self.assertEqual(len(args), 3) self.assertFalse(args['found']) def test_parse_args(self): @@ -39,7 +39,7 @@ def test_parse_args(self): args = 'search --datetime 2017-01-01 -q eo:cloud_cover<10 platform=sentinel-2a'.split(' ') args = parser.parse_args(args) - self.assertEqual(len(args), 6) + self.assertEqual(len(args), 5) self.assertEqual(args['datetime'], '2017-01-01') #assert(args['eo:cloud_cover'] == '0/20') #self.assertEqual(args['cloud_from'], 0) From 80a75c64a8e28c74f3aff1f2c916a29da0e43a40 Mon Sep 17 00:00:00 2001 From: matthewhanson Date: Tue, 18 Aug 2020 19:44:10 -0400 Subject: [PATCH 08/25] no default URL --- satsearch/search.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/satsearch/search.py b/satsearch/search.py index 4ca175c..7ad8a3b 100644 --- a/satsearch/search.py +++ b/satsearch/search.py @@ -7,11 +7,8 @@ from satstac.utils import dict_merge from urllib.parse import urljoin - logger = logging.getLogger(__name__) -API_URL = os.getenv('STAC_API_URL', 'https://earth-search.aws.element84.com/v0').rstrip('/') + '/' - class SatSearchError(Exception): pass @@ -22,8 +19,10 @@ class Search(object): search_op_list = ['>=', '<=', '=', '>', '<'] search_op_to_stac_op = {'>=': 'gte', '<=': 'lte', '=': 'eq', '>': 'gt', '<': 'lt'} - def __init__(self, url=API_URL, **kwargs): + def __init__(self, url=os.getenv('STAC_API_URL', None), **kwargs): """ Initialize a Search object with parameters """ + if url is None: + raise SatSearchError("URL not provided, pass into Search or define STAC_API_URL environment variable") self.url = url.rstrip("/") + "/" self.kwargs = kwargs @@ -66,9 +65,9 @@ def found(self, headers=None): return results['context']['matched'] return 0 - @classmethod - def query(cls, url=urljoin(API_URL, 'search'), headers=None, **kwargs): + def query(self, headers=None, **kwargs): """ Get request """ + url = urljoin(self.url, 'search') logger.debug('Query URL: %s, Body: %s' % (url, json.dumps(kwargs))) response = requests.post(url, data=json.dumps(kwargs), headers=headers) # API error From 23ab49836f9d3baf42ffb9002b3216a2f5e1469b Mon Sep 17 00:00:00 2001 From: matthewhanson Date: Tue, 18 Aug 2020 19:44:20 -0400 Subject: [PATCH 09/25] remove printassets option --- satsearch/cli.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/satsearch/cli.py b/satsearch/cli.py index 556c3a0..a761b8f 100644 --- a/satsearch/cli.py +++ b/satsearch/cli.py @@ -27,7 +27,7 @@ def __init__(self, *args, **kwargs): self.download_group.add_argument('--filename_template', default='${collection}/${date}/${id}', help='Save assets with this filename pattern based on metadata keys') self.download_group.add_argument('--download', help='Download assets', default=None, nargs='*') - h = 'Acknowledge paying egress costs for downloads (if in request pays bucket)' + h = 'Acknowledge paying egress costs for downloads (if in requester pays bucket on AWS)' self.download_group.add_argument('--requester-pays', help=h, default=False, action='store_true', dest='requester_pays') self.output_parser = argparse.ArgumentParser(add_help=False) @@ -37,7 +37,6 @@ def __init__(self, *args, **kwargs): h = 'Print calendar showing dates' self.output_group.add_argument('--print-cal', help=h, dest='printcal') #h = 'Print Item Asset definition from Collections' - #self.output_group.add_argument('--print-assets', help=h, dest='printassets', default=False, action='store_true') self.output_group.add_argument('--save', help='Save results as GeoJSON', default=None) def parse_args(self, *args, **kwargs): @@ -113,7 +112,7 @@ def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, n, {'eq': v}) -def main(items=None, printmd=None, printcal=None, #printassets=None, +def main(items=None, printmd=None, printcal=None, found=False, filename_template='${collection}/${date}/${id}', save=None, download=None, requester_pays=False, headers=None, **kwargs): """ Main function for performing a search """ @@ -140,9 +139,6 @@ def main(items=None, printmd=None, printcal=None, #printassets=None, if printcal: print(items.calendar(printcal)) - #if printassets: - # print(items.assets_definition()) - # save all metadata in JSON file if save is not None: items.save(filename=save) From fcc7482fd58c203821aba7ff43d7ba64402f912e Mon Sep 17 00:00:00 2001 From: matthewhanson Date: Tue, 18 Aug 2020 19:44:26 -0400 Subject: [PATCH 10/25] bump version --- satsearch/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satsearch/version.py b/satsearch/version.py index 0404d81..c2997e0 100644 --- a/satsearch/version.py +++ b/satsearch/version.py @@ -1 +1 @@ -__version__ = '0.3.0' +__version__ = '0.3.0-rc2' From b7071bd390e5618c5698a7ba74e5cd7481a2af4e Mon Sep 17 00:00:00 2001 From: matthewhanson Date: Tue, 18 Aug 2020 23:48:58 -0400 Subject: [PATCH 11/25] remove old refs to API_URL and update README about STAC API endpoints --- README.md | 10 +++++++++- satsearch/__init__.py | 2 +- satsearch/cli.py | 4 +++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 44e5b09..bdb358b 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,15 @@ [![CircleCI](https://circleci.com/gh/sat-utils/sat-search.svg?style=svg&circle-token=a66861b5cbba7acd4abd7975f804ab061a365e1b)](https://circleci.com/gh/sat-utils/sat-search) -Sat-search is a Python 3 library and a command line tool for discovering and downloading publicly available satellite imagery using a conformant API such as [sat-api](https://github.com/sat-utils/sat-api). +Sat-search is a Python 3 library and a command line tool for discovering and downloading publicly available satellite imagery using STAC compliant API. + +## STAC APIs + +Starting with v0.3.0, sat-search does not have a default STAC endpoint. This can be passed as a parameter when using the library, or define the environment variable `STAC_API_URL`. Endpoints known to work are provided in this table: + +| Endpoint | Data | +| -------- | ---- | +| https://earth-search.aws.element84.com/v0 | Sentinel-2 | ## Installation diff --git a/satsearch/__init__.py b/satsearch/__init__.py index b625d01..b2ae79b 100644 --- a/satsearch/__init__.py +++ b/satsearch/__init__.py @@ -1,4 +1,4 @@ -from satsearch.search import Search, API_URL +from satsearch.search import Search from satsearch.version import __version__ import logging diff --git a/satsearch/cli.py b/satsearch/cli.py index a761b8f..0751714 100644 --- a/satsearch/cli.py +++ b/satsearch/cli.py @@ -5,10 +5,12 @@ import sys from .version import __version__ -from satsearch import Search, API_URL +from satsearch import Search from satstac import ItemCollection from satstac.utils import dict_merge +API_URL = os.getenv('STAC_API_URL', None) + class SatUtilsParser(argparse.ArgumentParser): From 6c733399abcd936824301d1cc194adc17121a8c1 Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Thu, 20 Aug 2020 01:14:58 -0400 Subject: [PATCH 12/25] check for numberMatched if context not found, else return 0 --- satsearch/search.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/satsearch/search.py b/satsearch/search.py index 7ad8a3b..c5c3920 100644 --- a/satsearch/search.py +++ b/satsearch/search.py @@ -53,7 +53,6 @@ def search(cls, headers=None, **kwargs): def found(self, headers=None): """ Small query to determine total number of hits """ kwargs = { - 'page': 1, 'limit': 0 } kwargs.update(self.kwargs) @@ -61,9 +60,12 @@ def found(self, headers=None): results = self.query(url=url, headers=headers, **kwargs) # TODO - check for status_code logger.debug(f"Found results: {json.dumps(results)}") + found = 0 if 'context' in results: - return results['context']['matched'] - return 0 + found = results['context']['matched'] + elif 'numberMatched' in results: + found = results['numberMatched'] + return found def query(self, headers=None, **kwargs): """ Get request """ From 20a6407c9f3a8125931c7d81baecb32b900f19d1 Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Thu, 20 Aug 2020 01:52:03 -0400 Subject: [PATCH 13/25] update pagination through results --- satsearch/search.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/satsearch/search.py b/satsearch/search.py index c5c3920..fb37be2 100644 --- a/satsearch/search.py +++ b/satsearch/search.py @@ -82,24 +82,30 @@ def collection(self, cid, headers=None): url = urljoin(self.url, 'collections/%s' % cid) return Collection(self.query(url=url, headers=headers)) - def items(self, limit=10000, headers=None): + def items(self, limit=10000, page_limit=500, headers=None): """ Return all of the Items and Collections for this search """ - _limit = 500 - - items = [] found = self.found(headers=headers) if found > limit: logger.warning('There are more items found (%s) than the limit (%s) provided.' % (found, limit)) - maxitems = min(found, limit) - kwargs = { - 'page': 1, - 'limit': min(_limit, maxitems) + + nextlink = { + 'url': urljoin(self.url, 'search'), + 'headers': headers, + 'body': self.kwargs, + 'merge': False } - kwargs.update(self.kwargs) - url = urljoin(self.url, 'search') - while len(items) < maxitems: - items += [Item(i) for i in self.query(url=url, headers=headers, **kwargs)['features']] - kwargs['page'] += 1 + + maxitems = min(found, limit) + items = [] + while nextlink and len(items) < maxitems: + _headers = nextlink['headers'] + _body = nextlink['body'] + _body.update({'limit': page_limit}) + if nextlink.get('merge', False): + _headers.update(headers) + _body.update(self.kwargs) + resp = self.query(url=nextlink['url'], headers=_headers, **_body) + items += [Item(i) for i in resp['features']] # retrieve collections collections = [] From 5b2aeb967c420c144f0dfb009e4b591a056700d7 Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Thu, 20 Aug 2020 02:17:31 -0400 Subject: [PATCH 14/25] update pagination to use STAC spec with POST --- satsearch/search.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/satsearch/search.py b/satsearch/search.py index fb37be2..e967c42 100644 --- a/satsearch/search.py +++ b/satsearch/search.py @@ -59,7 +59,7 @@ def found(self, headers=None): url = urljoin(self.url, 'search') results = self.query(url=url, headers=headers, **kwargs) # TODO - check for status_code - logger.debug(f"Found results: {json.dumps(results)}") + logger.debug(f"Found: {json.dumps(results)}") found = 0 if 'context' in results: found = results['context']['matched'] @@ -72,6 +72,7 @@ def query(self, headers=None, **kwargs): url = urljoin(self.url, 'search') logger.debug('Query URL: %s, Body: %s' % (url, json.dumps(kwargs))) response = requests.post(url, data=json.dumps(kwargs), headers=headers) + logger.debug(f"Response: {response.text}") # API error if response.status_code != 200: raise SatSearchError(response.text) @@ -89,7 +90,7 @@ def items(self, limit=10000, page_limit=500, headers=None): logger.warning('There are more items found (%s) than the limit (%s) provided.' % (found, limit)) nextlink = { - 'url': urljoin(self.url, 'search'), + 'href': urljoin(self.url, 'search'), 'headers': headers, 'body': self.kwargs, 'merge': False @@ -98,14 +99,16 @@ def items(self, limit=10000, page_limit=500, headers=None): maxitems = min(found, limit) items = [] while nextlink and len(items) < maxitems: - _headers = nextlink['headers'] - _body = nextlink['body'] + _headers = nextlink.get('headers', {}) + _body = nextlink.get('body', {}) _body.update({'limit': page_limit}) if nextlink.get('merge', False): _headers.update(headers) _body.update(self.kwargs) - resp = self.query(url=nextlink['url'], headers=_headers, **_body) + resp = self.query(url=nextlink['href'], headers=_headers, **_body) items += [Item(i) for i in resp['features']] + links = [l for l in resp['links'] if l['rel'] == 'next'] + nextlink = links[0] if len(links) == 1 else None # retrieve collections collections = [] From 11d87e16d270477630739da8ca450994bdcd7c42 Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Thu, 20 Aug 2020 12:25:03 -0400 Subject: [PATCH 15/25] turn off check on maxitems and support GET method in next links --- satsearch/search.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/satsearch/search.py b/satsearch/search.py index e967c42..1454a32 100644 --- a/satsearch/search.py +++ b/satsearch/search.py @@ -98,14 +98,17 @@ def items(self, limit=10000, page_limit=500, headers=None): maxitems = min(found, limit) items = [] - while nextlink and len(items) < maxitems: + while nextlink: #and len(items) < maxitems: _headers = nextlink.get('headers', {}) _body = nextlink.get('body', {}) _body.update({'limit': page_limit}) if nextlink.get('merge', False): _headers.update(headers) _body.update(self.kwargs) - resp = self.query(url=nextlink['href'], headers=_headers, **_body) + if nextlink.get('method', 'GET'): + resp = self.query(url=nextlink['href']) + else: + resp = self.query(url=nextlink['href'], headers=_headers, **_body) items += [Item(i) for i in resp['features']] links = [l for l in resp['links'] if l['rel'] == 'next'] nextlink = links[0] if len(links) == 1 else None @@ -119,4 +122,6 @@ def items(self, limit=10000, page_limit=500, headers=None): except: pass + import pdb; pdb.set_trace() + return ItemCollection(items, collections=collections) From 52f1f1842188fae9df276e19b20a6b5dffa93ffe Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Thu, 20 Aug 2020 14:31:18 -0400 Subject: [PATCH 16/25] remove set_trace --- satsearch/search.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/satsearch/search.py b/satsearch/search.py index 1454a32..e129a92 100644 --- a/satsearch/search.py +++ b/satsearch/search.py @@ -122,6 +122,4 @@ def items(self, limit=10000, page_limit=500, headers=None): except: pass - import pdb; pdb.set_trace() - return ItemCollection(items, collections=collections) From 2b971da523c5e362abb78828d49369b1cc3695ae Mon Sep 17 00:00:00 2001 From: Trevor Skaggs Date: Fri, 21 Aug 2020 06:03:07 -0700 Subject: [PATCH 17/25] Fix page number and url bugs --- satsearch/search.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/satsearch/search.py b/satsearch/search.py index e129a92..3498b92 100644 --- a/satsearch/search.py +++ b/satsearch/search.py @@ -67,11 +67,11 @@ def found(self, headers=None): found = results['numberMatched'] return found - def query(self, headers=None, **kwargs): + def query(self, url=None, headers=None, **kwargs): """ Get request """ - url = urljoin(self.url, 'search') + url = url or urljoin(self.url, 'search') logger.debug('Query URL: %s, Body: %s' % (url, json.dumps(kwargs))) - response = requests.post(url, data=json.dumps(kwargs), headers=headers) + response = requests.post(url, json=kwargs, headers=headers) logger.debug(f"Response: {response.text}") # API error if response.status_code != 200: @@ -85,7 +85,7 @@ def collection(self, cid, headers=None): def items(self, limit=10000, page_limit=500, headers=None): """ Return all of the Items and Collections for this search """ - found = self.found(headers=headers) + found = 0 #self.found(headers=headers) if found > limit: logger.warning('There are more items found (%s) than the limit (%s) provided.' % (found, limit)) @@ -96,18 +96,18 @@ def items(self, limit=10000, page_limit=500, headers=None): 'merge': False } - maxitems = min(found, limit) + maxitems = limit #min(found, limit) items = [] - while nextlink: #and len(items) < maxitems: - _headers = nextlink.get('headers', {}) - _body = nextlink.get('body', {}) - _body.update({'limit': page_limit}) - if nextlink.get('merge', False): - _headers.update(headers) - _body.update(self.kwargs) + while nextlink and len(items) < maxitems: if nextlink.get('method', 'GET'): - resp = self.query(url=nextlink['href']) + resp = self.query(url=nextlink['href'], headers=headers, **self.kwargs) else: + _headers = nextlink.get('headers', {}) + _body = nextlink.get('body', {}) + _body.update({'limit': page_limit}) + if nextlink.get('merge', False): + _headers.update(headers) + _body.update(self.kwargs) resp = self.query(url=nextlink['href'], headers=_headers, **_body) items += [Item(i) for i in resp['features']] links = [l for l in resp['links'] if l['rel'] == 'next'] @@ -122,4 +122,6 @@ def items(self, limit=10000, page_limit=500, headers=None): except: pass + logger.debug(f"Found: {len(items)}") + return ItemCollection(items, collections=collections) From 0f542b72090455de865217059a41729ab8f47e06 Mon Sep 17 00:00:00 2001 From: Trevor Skaggs Date: Fri, 21 Aug 2020 06:05:46 -0700 Subject: [PATCH 18/25] White space --- satsearch/search.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/satsearch/search.py b/satsearch/search.py index 3498b92..577ef0b 100644 --- a/satsearch/search.py +++ b/satsearch/search.py @@ -121,7 +121,5 @@ def items(self, limit=10000, page_limit=500, headers=None): #del collections[c]['links'] except: pass - logger.debug(f"Found: {len(items)}") - return ItemCollection(items, collections=collections) From ff7e7a1669794d3af87bafba10b533f6756f080f Mon Sep 17 00:00:00 2001 From: Trevor Skaggs Date: Fri, 21 Aug 2020 06:47:47 -0700 Subject: [PATCH 19/25] Add limit flag to CLI and backend --- satsearch/cli.py | 10 ++++++---- satsearch/search.py | 2 ++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/satsearch/cli.py b/satsearch/cli.py index 0751714..4746c28 100644 --- a/satsearch/cli.py +++ b/satsearch/cli.py @@ -100,6 +100,7 @@ def newbie(cls, *args, **kwargs): parser.search_group.add_argument('--found', help=h, action='store_true', default=False) parser.search_group.add_argument('--url', help='URL of the API', default=API_URL) parser.search_group.add_argument('--headers', help='Additional request headers (JSON file or string)', default=None) + parser.search_group.add_argument('--limit', help='Limits the total number of itesm returned', default=None) parents.append(parser.download_parser) lparser = subparser.add_parser('load', help='Load items from previous search', parents=parents) @@ -122,10 +123,11 @@ def main(items=None, printmd=None, printcal=None, if items is None: ## if there are no items then perform a search search = Search.search(headers=headers, **kwargs) - if found: - num = search.found(headers=headers) - print('%s items found' % num) - return num + ## Commenting out found logic until functions correctly. + # if found: + # num = search.found(headers=headers) + # print('%s items found' % num) + # return num items = search.items(headers=headers) else: # otherwise, load a search from a file diff --git a/satsearch/search.py b/satsearch/search.py index 577ef0b..8d68b17 100644 --- a/satsearch/search.py +++ b/satsearch/search.py @@ -25,6 +25,7 @@ def __init__(self, url=os.getenv('STAC_API_URL', None), **kwargs): raise SatSearchError("URL not provided, pass into Search or define STAC_API_URL environment variable") self.url = url.rstrip("/") + "/" self.kwargs = kwargs + self.limit = int(self.kwargs['limit']) if 'limit' in self.kwargs else None @classmethod def search(cls, headers=None, **kwargs): @@ -86,6 +87,7 @@ def collection(self, cid, headers=None): def items(self, limit=10000, page_limit=500, headers=None): """ Return all of the Items and Collections for this search """ found = 0 #self.found(headers=headers) + limit = self.limit or limit if found > limit: logger.warning('There are more items found (%s) than the limit (%s) provided.' % (found, limit)) From 262443fbce6561438550fc28ba67fb2b789e8f34 Mon Sep 17 00:00:00 2001 From: Trevor Skaggs Date: Fri, 21 Aug 2020 06:50:36 -0700 Subject: [PATCH 20/25] Write gud --- satsearch/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satsearch/cli.py b/satsearch/cli.py index 4746c28..812fae2 100644 --- a/satsearch/cli.py +++ b/satsearch/cli.py @@ -100,7 +100,7 @@ def newbie(cls, *args, **kwargs): parser.search_group.add_argument('--found', help=h, action='store_true', default=False) parser.search_group.add_argument('--url', help='URL of the API', default=API_URL) parser.search_group.add_argument('--headers', help='Additional request headers (JSON file or string)', default=None) - parser.search_group.add_argument('--limit', help='Limits the total number of itesm returned', default=None) + parser.search_group.add_argument('--limit', help='Limits the total number of items returned', default=None) parents.append(parser.download_parser) lparser = subparser.add_parser('load', help='Load items from previous search', parents=parents) From 0bf0fe01e6e02e3fbe6d5a7a2b43f8221550911e Mon Sep 17 00:00:00 2001 From: Trevor Skaggs Date: Fri, 21 Aug 2020 06:57:15 -0700 Subject: [PATCH 21/25] Add docs update --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index bdb358b..0e41b34 100644 --- a/README.md +++ b/README.md @@ -112,6 +112,7 @@ search options: --found Only output how many Items found (default: False) --url URL URL of the API (default: https://earth-search.aws.element84.com/v0/) --headers HEADERS Additional request headers (JSON file or string) (default: None) + --limit LIMIT Limits total number of Items returned (default: Unlimited) ``` **Search options** @@ -125,6 +126,7 @@ search options: - **found** - This will print out the total number of scenes found, then exit without fetching the actual items (i.e., the query is made with limit=0). - **url** - The URL endpoint of a STAC compliant API, this can also be set with the environment variable STAC_API_URL - **headers** - Additional request headers useful for specifying authentication parameters +- **limit** - Limits total number of Items returned **Output options** These options control what to do with the search results, multiple switches can be provided. From a2f7c321edb32fd20c379cb5202dfa1eba2259f2 Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Fri, 21 Aug 2020 17:09:41 -0400 Subject: [PATCH 22/25] update found and paging --- .circleci/config.yml | 2 +- satsearch/cli.py | 8 ++++---- satsearch/search.py | 5 ++++- test/test_cli.py | 4 ++-- test/test_search.py | 12 +++++++----- 5 files changed, 18 insertions(+), 13 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c8657f6..d4967dd 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -44,7 +44,7 @@ jobs: pwd pip install -r requirements.txt pip install -r requirements-dev.txt - pytest --cov satsearch test/ + STAC_API_URL=https://earth-search.aws.element84.com/v0 pytest --cov satsearch test/ - save_cache: key: v1-dependencies-{{ checksum "requirements.txt"}} paths: diff --git a/satsearch/cli.py b/satsearch/cli.py index 812fae2..de10771 100644 --- a/satsearch/cli.py +++ b/satsearch/cli.py @@ -124,10 +124,10 @@ def main(items=None, printmd=None, printcal=None, ## if there are no items then perform a search search = Search.search(headers=headers, **kwargs) ## Commenting out found logic until functions correctly. - # if found: - # num = search.found(headers=headers) - # print('%s items found' % num) - # return num + if found: + num = search.found(headers=headers) + print('%s items found' % num) + return num items = search.items(headers=headers) else: # otherwise, load a search from a file diff --git a/satsearch/search.py b/satsearch/search.py index 8d68b17..bac6c10 100644 --- a/satsearch/search.py +++ b/satsearch/search.py @@ -58,6 +58,7 @@ def found(self, headers=None): } kwargs.update(self.kwargs) url = urljoin(self.url, 'search') + results = self.query(url=url, headers=headers, **kwargs) # TODO - check for status_code logger.debug(f"Found: {json.dumps(results)}") @@ -92,6 +93,7 @@ def items(self, limit=10000, page_limit=500, headers=None): logger.warning('There are more items found (%s) than the limit (%s) provided.' % (found, limit)) nextlink = { + 'method': 'POST', 'href': urljoin(self.url, 'search'), 'headers': headers, 'body': self.kwargs, @@ -101,12 +103,13 @@ def items(self, limit=10000, page_limit=500, headers=None): maxitems = limit #min(found, limit) items = [] while nextlink and len(items) < maxitems: - if nextlink.get('method', 'GET'): + if nextlink.get('method', 'GET') == 'GET': resp = self.query(url=nextlink['href'], headers=headers, **self.kwargs) else: _headers = nextlink.get('headers', {}) _body = nextlink.get('body', {}) _body.update({'limit': page_limit}) + if nextlink.get('merge', False): _headers.update(headers) _body.update(self.kwargs) diff --git a/test/test_cli.py b/test/test_cli.py index ce71289..19d854d 100644 --- a/test/test_cli.py +++ b/test/test_cli.py @@ -59,8 +59,8 @@ def test_main(self): def test_main_found(self): """ Run main function """ - found = main(datetime='2019-07-01', found=True) - self.assertEqual(found, 24737) + found = main(datetime='2020-01-01', found=True) + self.assertEqual(found, 17819) def test_main_load(self): items = main(items=os.path.join(testpath, 'scenes.geojson')) diff --git a/test/test_search.py b/test/test_search.py index c2a9baf..df3b165 100644 --- a/test/test_search.py +++ b/test/test_search.py @@ -4,7 +4,9 @@ import unittest from satstac import Item -from satsearch.search import SatSearchError, Search, API_URL +from satsearch.search import SatSearchError, Search + +API_URL = 'https://earth-search.aws.element84.com/v0' class Test(unittest.TestCase): @@ -21,7 +23,7 @@ def setUpClass(cls): def get_searches(self): """ Initialize and return search object """ - return [Search(datetime=r['properties']['datetime']) for r in self.results] + return [Search(datetime=r['properties']['datetime'], url=API_URL) for r in self.results] def test_search_init(self): """ Initialize a search object """ @@ -73,9 +75,9 @@ def test_get_ids_search(self): assert(search.found() == 4) assert(len(items) == 4) - def _test_query_bad_url(self): - with self.assertRaises(SatSearchError): - Search.query(url=os.path.join(API_URL, 'collections/nosuchcollection')) + #def _test_query_bad_url(self): + # with self.assertRaises(SatSearchError): + # Search.query(url=os.path.join(API_URL, 'collections/nosuchcollection')) def test_search_query_operator(self): expected = {'collections': ['sentinel-s2-l1c'], 'query': {'eo:cloud_cover': {'lte': '10'}, 'data_coverage': {'gt': '80'}}} From e680eb1fe344ab870cff078f514a759808aa66fd Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Fri, 21 Aug 2020 17:11:59 -0400 Subject: [PATCH 23/25] update version and CHANGELOG --- CHANGELOG.md | 6 +++--- satsearch/version.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cc4bc2b..03ba674 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,13 +4,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## [v0.3.0] - 2020-07-16 +## [v0.3.0] - 2020-08-21 ## Changed - Updated to work with STAC API v0.9.0 and v1.0.0-beta.2 -- `SATUTILS_API_URL` envvar changed to `STAC_API_URL` -- Refactored how envvar was set for URL to fix issues on some platforms +- `SATUTILS_API_URL` envvar changed to `STAC_API_URL` and default value removed. Specify with envvar or pass into Search when using library - When downloading, specify `filename_template` for location instead of both `datadir` and `filename`. +- Update pagination to precisely follow STAC spec ## [v0.2.3] - 2019-06-25 diff --git a/satsearch/version.py b/satsearch/version.py index c2997e0..0404d81 100644 --- a/satsearch/version.py +++ b/satsearch/version.py @@ -1 +1 @@ -__version__ = '0.3.0-rc2' +__version__ = '0.3.0' From 1f2e33e3da747656a6b138d692734e030572e2fe Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Fri, 21 Aug 2020 17:19:36 -0400 Subject: [PATCH 24/25] update README and tutorial --- README.md | 41 ++++++++++++++++++++++++++--------------- tutorial-1.ipynb | 2 +- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 0e41b34..447b515 100644 --- a/README.md +++ b/README.md @@ -77,21 +77,27 @@ As can be seen there are two subcommands, `search` and `load`, each of which has ``` $ sat-search search -h -usage: sat-search search [-h] [--version] [-v VERBOSITY] [--print-md [PRINTMD [PRINTMD ...]]] - [--print-cal PRINTCAL] [--save SAVE] [-c [COLLECTIONS [COLLECTIONS ...]]] - [--ids [IDS [IDS ...]]] [--bbox BBOX BBOX BBOX BBOX] [--intersects INTERSECTS] - [--datetime DATETIME] [-q [QUERY [QUERY ...]]] [--sortby [SORTBY [SORTBY ...]]] [--found] - [--url URL] [--headers HEADERS] +usage: sat-search search [-h] [--version] [-v VERBOSITY] + [--print-md [PRINTMD [PRINTMD ...]]] + [--print-cal PRINTCAL] [--save SAVE] + [-c [COLLECTIONS [COLLECTIONS ...]]] + [--ids [IDS [IDS ...]]] [--bbox BBOX BBOX BBOX BBOX] + [--intersects INTERSECTS] [--datetime DATETIME] + [-q [QUERY [QUERY ...]]] + [--sortby [SORTBY [SORTBY ...]]] [--found] + [--url URL] [--headers HEADERS] [--limit LIMIT] optional arguments: -h, --help show this help message and exit --version Print version and exit -v VERBOSITY, --verbosity VERBOSITY - 0:quiet, 1:error, 2:warning, 3:info, 4:debug (default: 2) + 0:quiet, 1:error, 2:warning, 3:info, 4:debug (default: + 2) output options: --print-md [PRINTMD [PRINTMD ...]] - Print specified metadata for matched scenes (default: None) + Print specified metadata for matched scenes (default: + None) --print-cal PRINTCAL Print calendar showing dates (default: None) --save SAVE Save results as GeoJSON (default: None) @@ -99,21 +105,26 @@ search options: -c [COLLECTIONS [COLLECTIONS ...]], --collections [COLLECTIONS [COLLECTIONS ...]] Name of collection (default: None) --ids [IDS [IDS ...]] - One or more scene IDs from provided collection (ignores other parameters) (default: None) + One or more scene IDs from provided collection + (ignores other parameters) (default: None) --bbox BBOX BBOX BBOX BBOX - Bounding box (min lon, min lat, max lon, max lat) (default: None) + Bounding box (min lon, min lat, max lon, max lat) + (default: None) --intersects INTERSECTS GeoJSON Feature (file or string) (default: None) - --datetime DATETIME Single date/time or begin and end date/time (e.g., 2017-01-01/2017-02-15) (default: None) + --datetime DATETIME Single date/time or begin and end date/time (e.g., + 2017-01-01/2017-02-15) (default: None) -q [QUERY [QUERY ...]], --query [QUERY [QUERY ...]] - Query properties of form KEY=VALUE (<, >, <=, >=, = supported) (default: None) + Query properties of form KEY=VALUE (<, >, <=, >=, = + supported) (default: None) --sortby [SORTBY [SORTBY ...]] Sort by fields (default: None) --found Only output how many Items found (default: False) - --url URL URL of the API (default: https://earth-search.aws.element84.com/v0/) - --headers HEADERS Additional request headers (JSON file or string) (default: None) - --limit LIMIT Limits total number of Items returned (default: Unlimited) -``` + --url URL URL of the API (default: None) + --headers HEADERS Additional request headers (JSON file or string) + (default: None) + --limit LIMIT Limits the total number of items returned (default: + None) **Search options** diff --git a/tutorial-1.ipynb b/tutorial-1.ipynb index fe1360e..16f6914 100644 --- a/tutorial-1.ipynb +++ b/tutorial-1.ipynb @@ -12,7 +12,7 @@ "\n", "Only the `search` module is in sat-search is used as a library, and it contains a single class, `Search`. The `parser` module is used for creating a CLI parser, and `main` contains the main function used in the CLI.\n", "\n", - "**API endpoint**: Sat-search uses an endpoint defined by the STAC_API_URL environment variable. This defaults to https://earth-search.aws.element84.com/v0 but can point to any STAC compatible endpoint." + "**API endpoint**: Sat-search required an endpoint to be passed in or defined by the STAC_API_URL environment variable. This tutorial uses https://earth-search.aws.element84.com/v0 but any STAC endpoint can be used." ] }, { From 6d79e5cc89b1458ded424d057972061b2f89c48a Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Fri, 21 Aug 2020 17:25:45 -0400 Subject: [PATCH 25/25] remove some old code, update limit --- satsearch/cli.py | 1 - satsearch/search.py | 5 ++--- test/test_search.py | 4 ---- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/satsearch/cli.py b/satsearch/cli.py index de10771..18c88ac 100644 --- a/satsearch/cli.py +++ b/satsearch/cli.py @@ -38,7 +38,6 @@ def __init__(self, *args, **kwargs): self.output_group.add_argument('--print-md', help=h, default=None, nargs='*', dest='printmd') h = 'Print calendar showing dates' self.output_group.add_argument('--print-cal', help=h, dest='printcal') - #h = 'Print Item Asset definition from Collections' self.output_group.add_argument('--save', help='Save results as GeoJSON', default=None) def parse_args(self, *args, **kwargs): diff --git a/satsearch/search.py b/satsearch/search.py index bac6c10..203aa3f 100644 --- a/satsearch/search.py +++ b/satsearch/search.py @@ -87,7 +87,7 @@ def collection(self, cid, headers=None): def items(self, limit=10000, page_limit=500, headers=None): """ Return all of the Items and Collections for this search """ - found = 0 #self.found(headers=headers) + found = self.found(headers=headers) limit = self.limit or limit if found > limit: logger.warning('There are more items found (%s) than the limit (%s) provided.' % (found, limit)) @@ -100,9 +100,8 @@ def items(self, limit=10000, page_limit=500, headers=None): 'merge': False } - maxitems = limit #min(found, limit) items = [] - while nextlink and len(items) < maxitems: + while nextlink and len(items) < limit: if nextlink.get('method', 'GET') == 'GET': resp = self.query(url=nextlink['href'], headers=headers, **self.kwargs) else: diff --git a/test/test_search.py b/test/test_search.py index df3b165..7156e02 100644 --- a/test/test_search.py +++ b/test/test_search.py @@ -75,10 +75,6 @@ def test_get_ids_search(self): assert(search.found() == 4) assert(len(items) == 4) - #def _test_query_bad_url(self): - # with self.assertRaises(SatSearchError): - # Search.query(url=os.path.join(API_URL, 'collections/nosuchcollection')) - def test_search_query_operator(self): expected = {'collections': ['sentinel-s2-l1c'], 'query': {'eo:cloud_cover': {'lte': '10'}, 'data_coverage': {'gt': '80'}}} instance = Search.search(collections=['sentinel-s2-l1c'],