diff --git a/.circleci/config.yml b/.circleci/config.yml index e52f02c..f6768dd 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -25,31 +25,6 @@ references: jobs: - build_and_test_27: - docker: - - image: circleci/python:2.7.13 - steps: - - *restore_repo - - checkout - - *save_repo - - restore_cache: - keys: - - v1-dependencies27-{{ checksum "requirements.txt"}} - - v1-dependencies27 - - run: | - pip install virtualenv - virtualenv ~/venv27 - . ~/venv27/bin/activate - pip install -r requirements.txt - pip install -r requirements-dev.txt - pip install . - cd test - pytest -v --cov satstac --cov-report term-missing - - save_cache: - key: v1-dependencies27-{{ checksum "requirements.txt"}} - paths: - - ~/venv27 - build_and_test_35: docker: - image: circleci/python:3.5 @@ -149,9 +124,6 @@ jobs: workflows: version: 2 - build_test_27: - jobs: - - build_and_test_27 build_test_35: jobs: - build_and_test_35 diff --git a/CHANGELOG.md b/CHANGELOG.md index 29b4a8e..37da3eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,10 +6,23 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +## [v0.1.1] - 2019-01-15 + +### Added + +- When adding items to a catalog the parent catalog of the item is now cached. This can greatly improve ingest speed when ingesting multiple items under the same parent, especially if the catalog is a remote catalog (i.e., updating catalog on an s3 bucket). + +### Changed + +- Python 3 only. With Python 2.7 going unsupported in 2020 the time has come to stop supporting 2.7. There are too many additions in Python3 that continue to make backward compatability with Python 2.7 more difficult. In the case of this release the addition of caching using `functools` made sat-stac incompatible with Python 2.7. +- More lenient version requirements for `requests` (now <=2.19.1). Otherwise can cause dependency incompatibility problems in some cases. +- Behavior of `path` and `filename` keyword arguments to Collection.add_item() has changed slightly. The components of `path` are now exclusively used to generate sub-catalogs, while `filename` is the relative filename (which could include a further subdirectory) from the last sub-catalog (it's parent). Before, it was assumed that Item files were always in a single subdirectory under it's parent catalog. +- Tutorials updated ## [v0.1.0] - 2019-01-13 Initial Release [Unreleased]: https://github.com/sat-utils/sat-stac/compare/master...develop +[v0.1.1]: https://github.com/sat-utils/sat-api/compare/0.1.0...v0.1.1 [v0.1.0]: https://github.com/sat-utils/sat-stac/tree/0.1.0 \ No newline at end of file diff --git a/README.md b/README.md index 16b8115..0bd9bed 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![CircleCI](https://circleci.com/gh/sat-utils/sat-stac.svg?style=svg&circle-token=ef97f3eea6cf901646fc2951e5a941686456b0da)](https://circleci.com/gh/sat-utils/sat-stac) [![PyPI version](https://badge.fury.io/py/sat-stac.svg)](https://badge.fury.io/py/sat-stac) [![codecov](https://codecov.io/gh/sat-utils/sat-stac/branch/master/graph/badge.svg)](https://codecov.io/gh/sat-utils/sat-stac) -This is a Python library for working with [Spatio-Temporal Asset Catalogs (STAC)](https://github.com/radiantearth/stac-spec). It can be used to +This is a Python 3 library for working with [Spatio-Temporal Asset Catalogs (STAC)](https://github.com/radiantearth/stac-spec). It can be used to - Open and update existing catalogs - Traverse through catalogs @@ -30,7 +30,7 @@ $ pip install . #### Versions -The initial sat-stac version is 0.1.0, which uses the STAC spec v0.6.0. To install other versions of sat-stac, install the matching version of sat-stac. +The latest version of sat-stac is 0.1.1, which uses the STAC spec v0.6.0. To install other versions of sat-stac, install the matching version of sat-stac. ```bash pip install satstac==0.1.0 @@ -40,7 +40,7 @@ The table below shows the corresponding versions between sat-stac and STAC: | sat-stac | STAC | | -------- | ---- | -| 0.1.0 | 0.6.0 | +| 0.1.x | 0.6.0 | ## Tutorials diff --git a/requirements.txt b/requirements.txt index c34a301..5f6be43 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -requests~=2.19.1 +requests>=2.19.1 python-dateutil~=2.7.5 diff --git a/satstac/collection.py b/satstac/collection.py index 5afefe8..452c237 100644 --- a/satstac/collection.py +++ b/satstac/collection.py @@ -1,4 +1,5 @@ import logging +import functools import os from datetime import datetime @@ -48,21 +49,13 @@ def properties(self): """ Get dictionary of properties """ return self.data.get('properties', {}) - def add_item(self, item, path='', filename='${id}'): - """ Add an item to this collection """ - start = datetime.now() - if self.filename is None: - raise STACError('Save catalog before adding items') - item_link = item.get_filename(path, filename) - item_fname = os.path.join(self.path, item_link) - item_path = os.path.dirname(item_fname) - root_link = self.links('root')[0] - root_path = os.path.dirname(root_link) - + @functools.lru_cache() + def parent_catalog(self, path): + """ Given path to a new Item find parent catalog """ cat = self - dirs = utils.splitall(item_link) - var_names = [v.strip('$').strip('{}') for v in utils.splitall(path)] - for i, d in enumerate(dirs[:-2]): + dirs = utils.splitall(path) + var_names = [v.strip('$').strip('{}') for v in dirs] + for i, d in enumerate(dirs): fname = os.path.join(os.path.join(cat.path, d), 'catalog.json') # open existing sub-catalog or create new one try: @@ -74,16 +67,30 @@ def add_item(self, item, path='', filename='${id}'): # add the sub-catalog to this catalog cat.add_catalog(subcat) cat = subcat + return cat.filename + + def add_item(self, item, path='', filename='${id}'): + """ Add an item to this collection """ + start = datetime.now() + if self.filename is None: + raise STACError('Save catalog before adding items') + item_link = item.get_filename(path, filename) + item_fname = os.path.join(self.path, item_link) + item_path = os.path.dirname(item_fname) + root_link = self.links('root')[0] + root_path = os.path.dirname(root_link) + + parent = Catalog.open(self.parent_catalog(item.substitute(path))) # create link to item - cat.add_link('item', os.path.relpath(item_fname, cat.path)) - cat.save() + parent.add_link('item', os.path.relpath(item_fname, parent.path)) + parent.save() # create links from item item.clean_hierarchy() item.add_link('self', os.path.join(self.endpoint(), os.path.relpath(item_fname, root_path))) item.add_link('root', os.path.relpath(root_link, item_path)) - item.add_link('parent', os.path.relpath(cat.filename, item_path)) + item.add_link('parent', os.path.relpath(parent.filename, item_path)) # this assumes the item has been added to a Collection, not a Catalog item.add_link('collection', os.path.relpath(self.filename, item_path)) diff --git a/satstac/version.py b/satstac/version.py index b794fd4..df9144c 100644 --- a/satstac/version.py +++ b/satstac/version.py @@ -1 +1 @@ -__version__ = '0.1.0' +__version__ = '0.1.1' diff --git a/tutorial-1.ipynb b/tutorial-1.ipynb index fcaedfb..1b88d6c 100644 --- a/tutorial-1.ipynb +++ b/tutorial-1.ipynb @@ -98,7 +98,7 @@ ], "source": [ "# get first (and only in this case) sub-catalog\n", - "subcat = cat.children()[0]\n", + "subcat = [c for c in cat.children()][0]\n", "\n", "# print some IDs\n", "print(\"Root Catalog: \", cat.id)\n", @@ -127,9 +127,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "\n", - "\n" + "\n", + "\n", + "\n" ] } ], @@ -416,7 +416,9 @@ "In addition to the item's properties, there are two additional fields that may be used in the patterns:\n", "\n", "- id: The id of the item\n", - "- date: The datetime property with the time portion stripped off" + "- date: The datetime property with the time portion stripped off\n", + "\n", + "The `path` provided indicates the sub-catalogs that will be used, while the `filename` provided indicates the relative filename of the Item to it's parent catalog. In this example `path` is `${landsat:path}/${landsat:row}` which means sub-catalogs are created for each Landsat 'path' which contains catalogs for each Landsat 'row'. Each Landsat 'row' catalog in turns contains `item` links with the name `${date}/${id}`.json." ] }, { @@ -441,8 +443,8 @@ ], "source": [ "# save \n", - "path = '${landsat:path}/${landsat:row}/${date}'\n", - "filename = '${id}'\n", + "path = '${landsat:path}/${landsat:row}'\n", + "filename = '${date}/${id}'\n", "\n", "collection.add_item(item, path=path, filename=filename)\n", "print('Item filename: ', item.filename)\n", diff --git a/tutorial-2.ipynb b/tutorial-2.ipynb index 57ad15c..a3db153 100644 --- a/tutorial-2.ipynb +++ b/tutorial-2.ipynb @@ -391,7 +391,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "By default the files are saved in the current directory. When downloading a lot of files this is hardly desired, so sat-stac provides a way to customize the path and filename assets are saved to. By default the filename is -., as shown above but custom patterns can be provided. Tutorial-1 discussed Views, which use custom path and filename patterns when creating Catalogs, and this works much the same way." + "By default the files are saved in the current directory. When downloading a lot of files this is hardly desired, so sat-stac provides a way to customize the path and filename assets are saved to. By default the filename is `-.`, as shown above but custom patterns can be provided. Tutorial-1 discussed Views, which use custom path and filename patterns when creating Catalogs, and this works much the same way." ] }, {