Merge branch 'master' into feature/partition-versioning

kedro-org · Oct 21, 2020 · a67c808 · a67c808
2 parents cc50803 + 93058c3
commit a67c808
Show file tree

Hide file tree

Showing 11 changed files with 74 additions and 7 deletions.
diff --git a/.gitignore b/.gitignore
@@ -75,6 +75,7 @@ htmlcov/
 .coverage.*
 .cache
 nosetests.xml
+test-output.xml
 coverage.xml
 *.cover
 .hypothesis/

diff --git a/README.md b/README.md
@@ -142,11 +142,12 @@ There is a growing community around Kedro. Have a look at the [Kedro FAQs](https
 - [MercadoLibre Argentina](https://www.mercadolibre.com.ar)
 - [Mosaic Data Science](https://www.youtube.com/watch?v=fCWGevB366g)
 - [NaranjaX](https://www.youtube.com/watch?v=_0kMmRfltEQ)
-- [Open Data Science LatAm](https://www.odsla.org/)
+- [Open Data Science LatAm](https://www.odesla.org/)
 - [Retrieva](https://tech.retrieva.jp/entry/2020/07/28/181414)
 - [Roche](https://www.roche.com/)
 - [UrbanLogiq](https://urbanlogiq.com/)
 - [XP](https://youtu.be/wgnGOVNkXqU?t=2210)
+- [Dendra Systems](https://www.dendra.io/)
 
 ## What licence do you use?
 

diff --git a/RELEASE.md b/RELEASE.md
@@ -13,16 +13,20 @@
 ## Major features and improvements
 
 ## Bug fixes and other changes
+* Fixed `TypeError` when converting dict inputs to a node made from a wrapped `partial` function.
 * Improved handling of non-ASCII word characters in dataset names.
   - For example, a dataset named `jalapeño` will be accessible as `DataCatalog.datasets.jalapeño` rather than `DataCatalog.datasets.jalape__o`.
 * Fixed `kedro install` for an Anaconda environment defined in `environment.yml`.
 * Fixed backwards compatibility with templates generated with older Kedro versions <0.16.5. No longer need to update `.kedro.yml` to use `kedro lint` and `kedro jupyter notebook convert`.
 * Improved documentation.
+* Fixed issue with saving a `TensorFlowModelDataset` in the HDF5 format with versioning enabled.
+* Added missing `run_result` argument in `after_pipeline_run` Hooks spec.
+* Added [kedro-starter-spaceflights](https://github.com/quantumblacklabs/kedro-starter-spaceflights/) alias for generating a project: `kedro new --starter spaceflights`.
 
 ## Breaking changes to the API
 
 ## Thanks for supporting contributions
-[Deepyaman Datta](https://github.com/deepyaman), [Bhavya Merchant](https://github.com/bnmerchant), [Lovkush Agarwal](https://github.com/Lovkush-A), [Varun Krishna S](https://github.com/vhawk19), [Sebastian Bertoli](https://github.com/sebastianbertoli)
+[Deepyaman Datta](https://github.com/deepyaman), [Bhavya Merchant](https://github.com/bnmerchant), [Lovkush Agarwal](https://github.com/Lovkush-A), [Varun Krishna S](https://github.com/vhawk19), [Sebastian Bertoli](https://github.com/sebastianbertoli), [Daniel Petti](https://github.com/djpetti)
 
 # Release 0.16.5
 

diff --git a/kedro/extras/datasets/tensorflow/tensorflow_model_dataset.py b/kedro/extras/datasets/tensorflow/tensorflow_model_dataset.py
@@ -31,7 +31,7 @@
 """
 import copy
 import tempfile
-from pathlib import PurePath, PurePosixPath
+from pathlib import Path, PurePath, PurePosixPath
 from typing import Any, Dict
 
 import fsspec
@@ -151,6 +151,10 @@ def _load(self) -> tf.keras.Model:
     def _save(self, data: tf.keras.Model) -> None:
         save_path = get_filepath_str(self._get_save_path(), self._protocol)
 
+        # Make sure all intermediate directories are created.
+        save_dir = Path(save_path).parent
+        save_dir.mkdir(parents=True, exist_ok=True)
+
         with tempfile.TemporaryDirectory(prefix=self._tmp_prefix) as path:
             if self._is_h5:
                 path = str(PurePath(path) / TEMPORARY_H5_FILE)

diff --git a/kedro/framework/cli/cli.py b/kedro/framework/cli/cli.py
@@ -78,6 +78,7 @@
     "pandas-iris": "git+https://github.com/quantumblacklabs/kedro-starter-pandas-iris.git",
     "pyspark": "git+https://github.com/quantumblacklabs/kedro-starter-pyspark.git",
     "pyspark-iris": "git+https://github.com/quantumblacklabs/kedro-starter-pyspark-iris.git",
+    "spaceflights": "git+https://github.com/quantumblacklabs/kedro-starter-spaceflights.git",
 }
 
 

diff --git a/kedro/framework/hooks/specs.py b/kedro/framework/hooks/specs.py
@@ -190,7 +190,11 @@ def before_pipeline_run(
 
     @hook_spec
     def after_pipeline_run(
-        self, run_params: Dict[str, Any], pipeline: Pipeline, catalog: DataCatalog
+        self,
+        run_params: Dict[str, Any],
+        run_result: Dict[str, Any],
+        pipeline: Pipeline,
+        catalog: DataCatalog,
     ) -> None:
         """Hook to be invoked after a pipeline runs.
 
@@ -213,6 +217,7 @@ def after_pipeline_run(
                      "extra_params": Optional[Dict[str, Any]]
                    }
 
+            run_result: The output of ``Pipeline`` run.
             pipeline: The ``Pipeline`` that was run.
             catalog: The ``DataCatalog`` used during the run.
         """

diff --git a/kedro/pipeline/node.py b/kedro/pipeline/node.py
@@ -677,7 +677,7 @@ def _dict_inputs_to_list(func: Callable[[Any], Any], inputs: Dict[str, str]):
     """Convert a dict representation of the node inputs to a list , ensuring
     the appropriate order for binding them to the node's function.
     """
-    sig = inspect.signature(func).bind(**inputs)
+    sig = inspect.signature(func, follow_wrapped=False).bind(**inputs)
     # for deterministic behavior in python 3.5, sort kwargs inputs alphabetically
     return list(sig.args) + sorted(sig.kwargs.values())
 

diff --git a/...es/project/{{ cookiecutter.repo_name }}/.ipython/profile_default/startup/00-kedro-init.py b/...es/project/{{ cookiecutter.repo_name }}/.ipython/profile_default/startup/00-kedro-init.py
@@ -3,6 +3,7 @@
 from pathlib import Path
 
 from IPython.core.magic import register_line_magic, needs_local_scope
+from kedro.framework.hooks import get_hook_manager
 
 # Find the project root (./../../../)
 startup_error = None
@@ -38,6 +39,13 @@ def reload_kedro(path, line=None):
         for module in to_remove:
             del sys.modules[module]
 
+        # clear hook manager; hook implementations will be re-registered when the
+        # context is instantiated again in `load_context()` below
+        hook_manager = get_hook_manager()
+        name_plugin_pairs = hook_manager.list_name_plugin()
+        for name, plugin in name_plugin_pairs:
+            hook_manager.unregister(name=name, plugin=plugin)
+
         logging.debug("Loading the context from %s", str(path))
         # Reload context to fix `pickle` related error (it is unable to serialize reloaded objects)
         # Some details can be found here:

diff --git a/tests/extras/datasets/tensorflow/test_tensorflow_model_dataset.py b/tests/extras/datasets/tensorflow/test_tensorflow_model_dataset.py
@@ -317,6 +317,30 @@ def test_save_and_load(
         new_predictions = reloaded.predict(dummy_x_test)
         np.testing.assert_allclose(predictions, new_predictions, rtol=1e-6, atol=1e-6)
 
+    def test_hdf5_save_format(
+        self,
+        dummy_tf_base_model,
+        dummy_x_test,
+        filepath,
+        tensorflow_model_dataset,
+        load_version,
+        save_version,
+    ):
+        """Test versioned TensorflowModelDataset can save TF graph models in
+        HDF5 format"""
+        hdf5_dataset = tensorflow_model_dataset(
+            filepath=filepath,
+            save_args={"save_format": "h5"},
+            version=Version(load_version, save_version),
+        )
+
+        predictions = dummy_tf_base_model.predict(dummy_x_test)
+        hdf5_dataset.save(dummy_tf_base_model)
+
+        reloaded = hdf5_dataset.load()
+        new_predictions = reloaded.predict(dummy_x_test)
+        np.testing.assert_allclose(predictions, new_predictions, rtol=1e-6, atol=1e-6)
+
     def test_prevent_overwrite(self, dummy_tf_base_model, versioned_tf_model_dataset):
         """Check the error when attempting to override the data set if the
         corresponding file for a given save version already exists."""

diff --git a/tests/framework/hooks/test_context_hooks.py b/tests/framework/hooks/test_context_hooks.py
@@ -262,11 +262,20 @@ def before_pipeline_run(
 
     @hook_impl
     def after_pipeline_run(
-        self, run_params: Dict[str, Any], pipeline: Pipeline, catalog: DataCatalog
+        self,
+        run_params: Dict[str, Any],
+        run_result: Dict[str, Any],
+        pipeline: Pipeline,
+        catalog: DataCatalog,
     ) -> None:
         self.logger.info(
             "Ran pipeline",
-            extra={"pipeline": pipeline, "run_params": run_params, "catalog": catalog},
+            extra={
+                "pipeline": pipeline,
+                "run_params": run_params,
+                "run_result": run_result,
+                "catalog": catalog,
+            },
         )
 
     @hook_impl

diff --git a/tests/pipeline/test_node.py b/tests/pipeline/test_node.py
@@ -464,3 +464,13 @@ def test_updated_partial(self):
         assert str(n) == "identity([in]) -> [out]"
         assert n.name == "identity([in]) -> [out]"
         assert n.short_name == "Identity"
+
+    def test_updated_partial_dict_inputs(self):
+        n = node(
+            update_wrapper(partial(biconcat, input1=["in1"]), biconcat),
+            dict(input2="in2"),
+            ["out"],
+        )
+        assert str(n) == "biconcat([in2]) -> [out]"
+        assert n.name == "biconcat([in2]) -> [out]"
+        assert n.short_name == "Biconcat"