From 6dd98c60d321c1ea606e6f5b65093e0cfad5d4ed Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Sun, 13 Jan 2019 22:17:49 -0500 Subject: [PATCH 01/12] add cache when adding item to catalog --- satstac/collection.py | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/satstac/collection.py b/satstac/collection.py index 5afefe8..f980ba4 100644 --- a/satstac/collection.py +++ b/satstac/collection.py @@ -1,4 +1,5 @@ import logging +import functools import os from datetime import datetime @@ -48,6 +49,27 @@ def properties(self): """ Get dictionary of properties """ return self.data.get('properties', {}) + @functools.lru_cache() + def parent_catalog(self, item_link): + """ Given path to a new Item find parent catalog """ + cat = self + dirs = utils.splitall(item_link) + path = os.path.dirname(item_link) + var_names = [v.strip('$').strip('{}') for v in utils.splitall(path)] + for i, d in enumerate(dirs[:-2]): + fname = os.path.join(os.path.join(cat.path, d), 'catalog.json') + # open existing sub-catalog or create new one + try: + subcat = Catalog.open(fname) + except STACError as err: + # create a new sub-catalog + subcat = self.create(id=d, description='%s catalog' % var_names[i]) + subcat.save_as(fname) + # add the sub-catalog to this catalog + cat.add_catalog(subcat) + cat = subcat + return cat.filename + def add_item(self, item, path='', filename='${id}'): """ Add an item to this collection """ start = datetime.now() @@ -58,7 +80,9 @@ def add_item(self, item, path='', filename='${id}'): item_path = os.path.dirname(item_fname) root_link = self.links('root')[0] root_path = os.path.dirname(root_link) + parent = Catalog.open(self.parent_catalog(item_link)) + ''' cat = self dirs = utils.splitall(item_link) var_names = [v.strip('$').strip('{}') for v in utils.splitall(path)] @@ -74,16 +98,17 @@ def add_item(self, item, path='', filename='${id}'): # add the sub-catalog to this catalog cat.add_catalog(subcat) cat = subcat + ''' # create link to item - cat.add_link('item', os.path.relpath(item_fname, cat.path)) - cat.save() + parent.add_link('item', os.path.relpath(item_fname, parent.path)) + parent.save() # create links from item item.clean_hierarchy() item.add_link('self', os.path.join(self.endpoint(), os.path.relpath(item_fname, root_path))) item.add_link('root', os.path.relpath(root_link, item_path)) - item.add_link('parent', os.path.relpath(cat.filename, item_path)) + item.add_link('parent', os.path.relpath(parent.filename, item_path)) # this assumes the item has been added to a Collection, not a Catalog item.add_link('collection', os.path.relpath(self.filename, item_path)) From a4fc20e9ccd93e3b6601eccec8b519395832f6b2 Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Mon, 14 Jan 2019 05:53:16 +0000 Subject: [PATCH 02/12] caching debug --- satstac/collection.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/satstac/collection.py b/satstac/collection.py index f980ba4..39f587e 100644 --- a/satstac/collection.py +++ b/satstac/collection.py @@ -50,13 +50,14 @@ def properties(self): return self.data.get('properties', {}) @functools.lru_cache() - def parent_catalog(self, item_link): + def parent_catalog(self, path): """ Given path to a new Item find parent catalog """ + print(path) cat = self - dirs = utils.splitall(item_link) - path = os.path.dirname(item_link) - var_names = [v.strip('$').strip('{}') for v in utils.splitall(path)] - for i, d in enumerate(dirs[:-2]): + dirs = utils.splitall(path) + print(dirs) + var_names = [v.strip('$').strip('{}') for v in dirs] + for i, d in enumerate(dirs[:-1]): fname = os.path.join(os.path.join(cat.path, d), 'catalog.json') # open existing sub-catalog or create new one try: @@ -80,7 +81,7 @@ def add_item(self, item, path='', filename='${id}'): item_path = os.path.dirname(item_fname) root_link = self.links('root')[0] root_path = os.path.dirname(root_link) - parent = Catalog.open(self.parent_catalog(item_link)) + parent = Catalog.open(self.parent_catalog(path)) ''' cat = self @@ -102,6 +103,7 @@ def add_item(self, item, path='', filename='${id}'): # create link to item parent.add_link('item', os.path.relpath(item_fname, parent.path)) + print('parent', parent.filename) parent.save() # create links from item From 58ba8ae4d9b2f6ea0f727db8f9de2347217421b7 Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Mon, 14 Jan 2019 00:54:21 -0500 Subject: [PATCH 03/12] change version --- satstac/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satstac/version.py b/satstac/version.py index b794fd4..476bae1 100644 --- a/satstac/version.py +++ b/satstac/version.py @@ -1 +1 @@ -__version__ = '0.1.0' +__version__ = '0.1.1b1' From a41987a4ba0eb906a2a7d373c6891d7d621443b6 Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Mon, 14 Jan 2019 21:28:16 -0500 Subject: [PATCH 04/12] update add items --- satstac/collection.py | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/satstac/collection.py b/satstac/collection.py index 39f587e..0cc70c6 100644 --- a/satstac/collection.py +++ b/satstac/collection.py @@ -52,12 +52,10 @@ def properties(self): @functools.lru_cache() def parent_catalog(self, path): """ Given path to a new Item find parent catalog """ - print(path) cat = self dirs = utils.splitall(path) - print(dirs) var_names = [v.strip('$').strip('{}') for v in dirs] - for i, d in enumerate(dirs[:-1]): + for i, d in enumerate(dirs): fname = os.path.join(os.path.join(cat.path, d), 'catalog.json') # open existing sub-catalog or create new one try: @@ -81,29 +79,11 @@ def add_item(self, item, path='', filename='${id}'): item_path = os.path.dirname(item_fname) root_link = self.links('root')[0] root_path = os.path.dirname(root_link) - parent = Catalog.open(self.parent_catalog(path)) - ''' - cat = self - dirs = utils.splitall(item_link) - var_names = [v.strip('$').strip('{}') for v in utils.splitall(path)] - for i, d in enumerate(dirs[:-2]): - fname = os.path.join(os.path.join(cat.path, d), 'catalog.json') - # open existing sub-catalog or create new one - try: - subcat = Catalog.open(fname) - except STACError as err: - # create a new sub-catalog - subcat = self.create(id=d, description='%s catalog' % var_names[i]) - subcat.save_as(fname) - # add the sub-catalog to this catalog - cat.add_catalog(subcat) - cat = subcat - ''' + parent = Catalog.open(self.parent_catalog(item.substitute(path)))Z # create link to item parent.add_link('item', os.path.relpath(item_fname, parent.path)) - print('parent', parent.filename) parent.save() # create links from item From e1bff81400a8cce8bc9d3b6051308e44fd43a141 Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Mon, 14 Jan 2019 21:30:44 -0500 Subject: [PATCH 05/12] fix typo --- satstac/collection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satstac/collection.py b/satstac/collection.py index 0cc70c6..452c237 100644 --- a/satstac/collection.py +++ b/satstac/collection.py @@ -80,7 +80,7 @@ def add_item(self, item, path='', filename='${id}'): root_link = self.links('root')[0] root_path = os.path.dirname(root_link) - parent = Catalog.open(self.parent_catalog(item.substitute(path)))Z + parent = Catalog.open(self.parent_catalog(item.substitute(path))) # create link to item parent.add_link('item', os.path.relpath(item_fname, parent.path)) From cd1720b252d404201cfc69f2b710a458cc5a8b2c Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Tue, 15 Jan 2019 14:07:48 -0500 Subject: [PATCH 06/12] bump requests version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c34a301..a9e2ebf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -requests~=2.19.1 +requests~=2.21.0 python-dateutil~=2.7.5 From 2be1e07b27d9ae2fb87b86be386cfb8644fb048a Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Tue, 15 Jan 2019 17:18:30 -0500 Subject: [PATCH 07/12] allow requests to be >= 2.19.1 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a9e2ebf..5f6be43 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -requests~=2.21.0 +requests>=2.19.1 python-dateutil~=2.7.5 From eac50f496795fad5d496ad88022da1ef7327b97e Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Tue, 15 Jan 2019 17:57:19 -0500 Subject: [PATCH 08/12] update changelog --- CHANGELOG.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 29b4a8e..e1099a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,10 +6,22 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +## [v0.1.1] - 2019-01-15 + +### Added + +- When adding items to a catalog the parent catalog of the item is now cached. This can greatly improve ingest speed when ingesting multiple items under the same parent, especially if the catalog is a remote catalog (i.e., updating catalog on an s3 bucket). + +### Changed + +- More lenient version requirements for `requests` (now <=2.19.1). Otherwise can cause dependency incompatibility problems in some cases. +- Behavior of `path` and `filename` keyword arguments to Collection.add_item() has changed slightly. The components of `path` are now exclusively used to generate sub-catalogs, while `filename` is the relative filename (which could include a further subdirectory) from the last sub-catalog (it's parent). Before, it was assumed that Item files were always in a single subdirectory under it's parent catalog. +- Tutorials updated ## [v0.1.0] - 2019-01-13 Initial Release [Unreleased]: https://github.com/sat-utils/sat-stac/compare/master...develop +[v0.1.1]: https://github.com/sat-utils/sat-api/compare/0.1.0...v0.1.1 [v0.1.0]: https://github.com/sat-utils/sat-stac/tree/0.1.0 \ No newline at end of file From 618012d0f13f5f7872fd1ef35e28c357bb07a7f0 Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Tue, 15 Jan 2019 17:57:25 -0500 Subject: [PATCH 09/12] update tutorials --- tutorial-1.ipynb | 16 +++++++++------- tutorial-2.ipynb | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/tutorial-1.ipynb b/tutorial-1.ipynb index fcaedfb..1b88d6c 100644 --- a/tutorial-1.ipynb +++ b/tutorial-1.ipynb @@ -98,7 +98,7 @@ ], "source": [ "# get first (and only in this case) sub-catalog\n", - "subcat = cat.children()[0]\n", + "subcat = [c for c in cat.children()][0]\n", "\n", "# print some IDs\n", "print(\"Root Catalog: \", cat.id)\n", @@ -127,9 +127,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "\n", - "\n" + "\n", + "\n", + "\n" ] } ], @@ -416,7 +416,9 @@ "In addition to the item's properties, there are two additional fields that may be used in the patterns:\n", "\n", "- id: The id of the item\n", - "- date: The datetime property with the time portion stripped off" + "- date: The datetime property with the time portion stripped off\n", + "\n", + "The `path` provided indicates the sub-catalogs that will be used, while the `filename` provided indicates the relative filename of the Item to it's parent catalog. In this example `path` is `${landsat:path}/${landsat:row}` which means sub-catalogs are created for each Landsat 'path' which contains catalogs for each Landsat 'row'. Each Landsat 'row' catalog in turns contains `item` links with the name `${date}/${id}`.json." ] }, { @@ -441,8 +443,8 @@ ], "source": [ "# save \n", - "path = '${landsat:path}/${landsat:row}/${date}'\n", - "filename = '${id}'\n", + "path = '${landsat:path}/${landsat:row}'\n", + "filename = '${date}/${id}'\n", "\n", "collection.add_item(item, path=path, filename=filename)\n", "print('Item filename: ', item.filename)\n", diff --git a/tutorial-2.ipynb b/tutorial-2.ipynb index 57ad15c..a3db153 100644 --- a/tutorial-2.ipynb +++ b/tutorial-2.ipynb @@ -391,7 +391,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "By default the files are saved in the current directory. When downloading a lot of files this is hardly desired, so sat-stac provides a way to customize the path and filename assets are saved to. By default the filename is -., as shown above but custom patterns can be provided. Tutorial-1 discussed Views, which use custom path and filename patterns when creating Catalogs, and this works much the same way." + "By default the files are saved in the current directory. When downloading a lot of files this is hardly desired, so sat-stac provides a way to customize the path and filename assets are saved to. By default the filename is `-.`, as shown above but custom patterns can be provided. Tutorial-1 discussed Views, which use custom path and filename patterns when creating Catalogs, and this works much the same way." ] }, { From 7dc660c1468b9c968e78357b1d3ce432f5bc73be Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Tue, 15 Jan 2019 17:57:38 -0500 Subject: [PATCH 10/12] bump version --- satstac/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/satstac/version.py b/satstac/version.py index 476bae1..df9144c 100644 --- a/satstac/version.py +++ b/satstac/version.py @@ -1 +1 @@ -__version__ = '0.1.1b1' +__version__ = '0.1.1' From 82d850b1ccdd4b0f1e327172125bafea5de61415 Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Tue, 15 Jan 2019 18:25:01 -0500 Subject: [PATCH 11/12] update changelog and readme with notes about Python 2.7 (now unsupported) --- CHANGELOG.md | 1 + README.md | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e1099a2..37da3eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Changed +- Python 3 only. With Python 2.7 going unsupported in 2020 the time has come to stop supporting 2.7. There are too many additions in Python3 that continue to make backward compatability with Python 2.7 more difficult. In the case of this release the addition of caching using `functools` made sat-stac incompatible with Python 2.7. - More lenient version requirements for `requests` (now <=2.19.1). Otherwise can cause dependency incompatibility problems in some cases. - Behavior of `path` and `filename` keyword arguments to Collection.add_item() has changed slightly. The components of `path` are now exclusively used to generate sub-catalogs, while `filename` is the relative filename (which could include a further subdirectory) from the last sub-catalog (it's parent). Before, it was assumed that Item files were always in a single subdirectory under it's parent catalog. - Tutorials updated diff --git a/README.md b/README.md index 16b8115..0bd9bed 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![CircleCI](https://circleci.com/gh/sat-utils/sat-stac.svg?style=svg&circle-token=ef97f3eea6cf901646fc2951e5a941686456b0da)](https://circleci.com/gh/sat-utils/sat-stac) [![PyPI version](https://badge.fury.io/py/sat-stac.svg)](https://badge.fury.io/py/sat-stac) [![codecov](https://codecov.io/gh/sat-utils/sat-stac/branch/master/graph/badge.svg)](https://codecov.io/gh/sat-utils/sat-stac) -This is a Python library for working with [Spatio-Temporal Asset Catalogs (STAC)](https://github.com/radiantearth/stac-spec). It can be used to +This is a Python 3 library for working with [Spatio-Temporal Asset Catalogs (STAC)](https://github.com/radiantearth/stac-spec). It can be used to - Open and update existing catalogs - Traverse through catalogs @@ -30,7 +30,7 @@ $ pip install . #### Versions -The initial sat-stac version is 0.1.0, which uses the STAC spec v0.6.0. To install other versions of sat-stac, install the matching version of sat-stac. +The latest version of sat-stac is 0.1.1, which uses the STAC spec v0.6.0. To install other versions of sat-stac, install the matching version of sat-stac. ```bash pip install satstac==0.1.0 @@ -40,7 +40,7 @@ The table below shows the corresponding versions between sat-stac and STAC: | sat-stac | STAC | | -------- | ---- | -| 0.1.0 | 0.6.0 | +| 0.1.x | 0.6.0 | ## Tutorials From cee34df21a1e4995885dba8020b5b7e1cf5fa118 Mon Sep 17 00:00:00 2001 From: Matthew Hanson Date: Tue, 15 Jan 2019 18:25:12 -0500 Subject: [PATCH 12/12] ci: remove tests for Python 2.7 --- .circleci/config.yml | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index e52f02c..f6768dd 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -25,31 +25,6 @@ references: jobs: - build_and_test_27: - docker: - - image: circleci/python:2.7.13 - steps: - - *restore_repo - - checkout - - *save_repo - - restore_cache: - keys: - - v1-dependencies27-{{ checksum "requirements.txt"}} - - v1-dependencies27 - - run: | - pip install virtualenv - virtualenv ~/venv27 - . ~/venv27/bin/activate - pip install -r requirements.txt - pip install -r requirements-dev.txt - pip install . - cd test - pytest -v --cov satstac --cov-report term-missing - - save_cache: - key: v1-dependencies27-{{ checksum "requirements.txt"}} - paths: - - ~/venv27 - build_and_test_35: docker: - image: circleci/python:3.5 @@ -149,9 +124,6 @@ jobs: workflows: version: 2 - build_test_27: - jobs: - - build_and_test_27 build_test_35: jobs: - build_and_test_35