From 62a3d0016f5d28dfbbbfd476528f6eeab26c9036 Mon Sep 17 00:00:00 2001 From: Waylon Walker Date: Wed, 14 Oct 2020 05:45:36 -0500 Subject: [PATCH 1/9] added test-output.xml to .gitignore (#558) Co-authored-by: WaylonWalker --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index ae60080e7c..1abd52c0d6 100644 --- a/.gitignore +++ b/.gitignore @@ -75,6 +75,7 @@ htmlcov/ .coverage.* .cache nosetests.xml +test-output.xml coverage.xml *.cover .hypothesis/ From cd2687ef9abe761e62d44295f534d4cd5122d23c Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Mon, 19 Oct 2020 11:38:05 -0400 Subject: [PATCH 2/9] Add minimal, black-compatible flake8 configuration (#524) --- kedro/framework/cli/project.py | 6 ++---- .../project/{{ cookiecutter.repo_name }}/setup.cfg | 4 ++++ tests/framework/cli/test_project.py | 14 ++++---------- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/kedro/framework/cli/project.py b/kedro/framework/cli/project.py index 953d6861bb..3ff30bd16a 100644 --- a/kedro/framework/cli/project.py +++ b/kedro/framework/cli/project.py @@ -125,12 +125,10 @@ def lint(files, check_only): ) from exc python_call("black", ("--check",) + files if check_only else files) - python_call("flake8", ("--max-line-length=88",) + files) + python_call("flake8", files) check_flag = ("-c",) if check_only else () - python_call( - "isort", (*check_flag, "-rc", "-tc", "-up", "-fgw=0", "-m=3", "-w=88") + files - ) + python_call("isort", (*check_flag, "-rc") + files) @project_group.command() diff --git a/kedro/templates/project/{{ cookiecutter.repo_name }}/setup.cfg b/kedro/templates/project/{{ cookiecutter.repo_name }}/setup.cfg index d24c0e98c4..3226e16552 100644 --- a/kedro/templates/project/{{ cookiecutter.repo_name }}/setup.cfg +++ b/kedro/templates/project/{{ cookiecutter.repo_name }}/setup.cfg @@ -1,3 +1,7 @@ +[flake8] +max-line-length=88 +extend-ignore=E203 + [isort] multi_line_output=3 include_trailing_comma=True diff --git a/tests/framework/cli/test_project.py b/tests/framework/cli/test_project.py index ca92a0a38d..5a113b0cfe 100644 --- a/tests/framework/cli/test_project.py +++ b/tests/framework/cli/test_project.py @@ -165,11 +165,8 @@ def test_lint( ) expected_calls = [ mocker.call("black", expected_files), - mocker.call("flake8", ("--max-line-length=88",) + expected_files), - mocker.call( - "isort", - ("-rc", "-tc", "-up", "-fgw=0", "-m=3", "-w=88") + expected_files, - ), + mocker.call("flake8", expected_files), + mocker.call("isort", ("-rc",) + expected_files), ] assert python_call_mock.call_args_list == expected_calls @@ -201,11 +198,8 @@ def test_lint_check_only( ) expected_calls = [ mocker.call("black", ("--check",) + expected_files), - mocker.call("flake8", ("--max-line-length=88",) + expected_files), - mocker.call( - "isort", - ("-c", "-rc", "-tc", "-up", "-fgw=0", "-m=3", "-w=88") + expected_files, - ), + mocker.call("flake8", expected_files), + mocker.call("isort", ("-c", "-rc") + expected_files), ] assert python_call_mock.call_args_list == expected_calls From 4aa83377a4cc2d67b427d2790c0f0424dc1f7463 Mon Sep 17 00:00:00 2001 From: Daniel Petti Date: Mon, 19 Oct 2020 15:05:25 -0400 Subject: [PATCH 3/9] [KED-2140] Fix issue with saving versioned HDF5 models. (#519) --- RELEASE.md | 3 ++- .../tensorflow/tensorflow_model_dataset.py | 6 ++++- .../test_tensorflow_model_dataset.py | 24 +++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index bb868bbb5b..30676386ed 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -18,11 +18,12 @@ * Fixed `kedro install` for an Anaconda environment defined in `environment.yml`. * Fixed backwards compatibility with templates generated with older Kedro versions <0.16.5. No longer need to update `.kedro.yml` to use `kedro lint` and `kedro jupyter notebook convert`. * Improved documentation. +* Fixed issue with saving a `TensorFlowModelDataset` in the HDF5 format with versioning enabled. ## Breaking changes to the API ## Thanks for supporting contributions -[Deepyaman Datta](https://github.com/deepyaman), [Bhavya Merchant](https://github.com/bnmerchant), [Lovkush Agarwal](https://github.com/Lovkush-A), [Varun Krishna S](https://github.com/vhawk19), [Sebastian Bertoli](https://github.com/sebastianbertoli) +[Deepyaman Datta](https://github.com/deepyaman), [Bhavya Merchant](https://github.com/bnmerchant), [Lovkush Agarwal](https://github.com/Lovkush-A), [Varun Krishna S](https://github.com/vhawk19), [Sebastian Bertoli](https://github.com/sebastianbertoli), [Daniel Petti](https://github.com/djpetti) # Release 0.16.5 diff --git a/kedro/extras/datasets/tensorflow/tensorflow_model_dataset.py b/kedro/extras/datasets/tensorflow/tensorflow_model_dataset.py index 109f686169..d5db7fed12 100644 --- a/kedro/extras/datasets/tensorflow/tensorflow_model_dataset.py +++ b/kedro/extras/datasets/tensorflow/tensorflow_model_dataset.py @@ -31,7 +31,7 @@ """ import copy import tempfile -from pathlib import PurePath, PurePosixPath +from pathlib import Path, PurePath, PurePosixPath from typing import Any, Dict import fsspec @@ -151,6 +151,10 @@ def _load(self) -> tf.keras.Model: def _save(self, data: tf.keras.Model) -> None: save_path = get_filepath_str(self._get_save_path(), self._protocol) + # Make sure all intermediate directories are created. + save_dir = Path(save_path).parent + save_dir.mkdir(parents=True, exist_ok=True) + with tempfile.TemporaryDirectory(prefix=self._tmp_prefix) as path: if self._is_h5: path = str(PurePath(path) / TEMPORARY_H5_FILE) diff --git a/tests/extras/datasets/tensorflow/test_tensorflow_model_dataset.py b/tests/extras/datasets/tensorflow/test_tensorflow_model_dataset.py index 09ad58cd70..a3f87d1399 100644 --- a/tests/extras/datasets/tensorflow/test_tensorflow_model_dataset.py +++ b/tests/extras/datasets/tensorflow/test_tensorflow_model_dataset.py @@ -317,6 +317,30 @@ def test_save_and_load( new_predictions = reloaded.predict(dummy_x_test) np.testing.assert_allclose(predictions, new_predictions, rtol=1e-6, atol=1e-6) + def test_hdf5_save_format( + self, + dummy_tf_base_model, + dummy_x_test, + filepath, + tensorflow_model_dataset, + load_version, + save_version, + ): + """Test versioned TensorflowModelDataset can save TF graph models in + HDF5 format""" + hdf5_dataset = tensorflow_model_dataset( + filepath=filepath, + save_args={"save_format": "h5"}, + version=Version(load_version, save_version), + ) + + predictions = dummy_tf_base_model.predict(dummy_x_test) + hdf5_dataset.save(dummy_tf_base_model) + + reloaded = hdf5_dataset.load() + new_predictions = reloaded.predict(dummy_x_test) + np.testing.assert_allclose(predictions, new_predictions, rtol=1e-6, atol=1e-6) + def test_prevent_overwrite(self, dummy_tf_base_model, versioned_tf_model_dataset): """Check the error when attempting to override the data set if the corresponding file for a given save version already exists.""" From e0b96145fdf0531c628aaf9ea8892fd61696482d Mon Sep 17 00:00:00 2001 From: Lais Carvalho Date: Tue, 20 Oct 2020 10:30:56 +0100 Subject: [PATCH 4/9] Add company and correct link (#826) --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7bc44f7cf5..be9b1c7b9c 100644 --- a/README.md +++ b/README.md @@ -142,11 +142,12 @@ There is a growing community around Kedro. Have a look at the [Kedro FAQs](https - [MercadoLibre Argentina](https://www.mercadolibre.com.ar) - [Mosaic Data Science](https://www.youtube.com/watch?v=fCWGevB366g) - [NaranjaX](https://www.youtube.com/watch?v=_0kMmRfltEQ) -- [Open Data Science LatAm](https://www.odsla.org/) +- [Open Data Science LatAm](https://www.odesla.org/) - [Retrieva](https://tech.retrieva.jp/entry/2020/07/28/181414) - [Roche](https://www.roche.com/) - [UrbanLogiq](https://urbanlogiq.com/) - [XP](https://youtu.be/wgnGOVNkXqU?t=2210) +- [Dendra Systems](https://www.dendra.io/) ## What licence do you use? From f5b09917a9301469d452de69da105b2941e8d18f Mon Sep 17 00:00:00 2001 From: "Kiyohito Kunii (Kiyo)" <8097799+921kiyo@users.noreply.github.com> Date: Tue, 20 Oct 2020 11:53:27 +0100 Subject: [PATCH 5/9] Add missing `run_result` argument in `after_pipeline_run` Hooks spec (#830) --- RELEASE.md | 1 + kedro/framework/hooks/specs.py | 7 ++++++- tests/framework/hooks/test_context_hooks.py | 13 +++++++++++-- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 30676386ed..68a53b4018 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -19,6 +19,7 @@ * Fixed backwards compatibility with templates generated with older Kedro versions <0.16.5. No longer need to update `.kedro.yml` to use `kedro lint` and `kedro jupyter notebook convert`. * Improved documentation. * Fixed issue with saving a `TensorFlowModelDataset` in the HDF5 format with versioning enabled. +* Added missing `run_result` argument in `after_pipeline_run` Hooks spec. ## Breaking changes to the API diff --git a/kedro/framework/hooks/specs.py b/kedro/framework/hooks/specs.py index d49b8fee77..67679db0cd 100644 --- a/kedro/framework/hooks/specs.py +++ b/kedro/framework/hooks/specs.py @@ -190,7 +190,11 @@ def before_pipeline_run( @hook_spec def after_pipeline_run( - self, run_params: Dict[str, Any], pipeline: Pipeline, catalog: DataCatalog + self, + run_params: Dict[str, Any], + run_result: Dict[str, Any], + pipeline: Pipeline, + catalog: DataCatalog, ) -> None: """Hook to be invoked after a pipeline runs. @@ -213,6 +217,7 @@ def after_pipeline_run( "extra_params": Optional[Dict[str, Any]] } + run_result: The output of ``Pipeline`` run. pipeline: The ``Pipeline`` that was run. catalog: The ``DataCatalog`` used during the run. """ diff --git a/tests/framework/hooks/test_context_hooks.py b/tests/framework/hooks/test_context_hooks.py index b6b68abfe9..6d2c2ade2a 100644 --- a/tests/framework/hooks/test_context_hooks.py +++ b/tests/framework/hooks/test_context_hooks.py @@ -262,11 +262,20 @@ def before_pipeline_run( @hook_impl def after_pipeline_run( - self, run_params: Dict[str, Any], pipeline: Pipeline, catalog: DataCatalog + self, + run_params: Dict[str, Any], + run_result: Dict[str, Any], + pipeline: Pipeline, + catalog: DataCatalog, ) -> None: self.logger.info( "Ran pipeline", - extra={"pipeline": pipeline, "run_params": run_params, "catalog": catalog}, + extra={ + "pipeline": pipeline, + "run_params": run_params, + "run_result": run_result, + "catalog": catalog, + }, ) @hook_impl From 8942306a549367778e8eea7ca0814e884a4bf5d5 Mon Sep 17 00:00:00 2001 From: Dmitrii Deriabin <44967953+DmitriiDeriabinQB@users.noreply.github.com> Date: Tue, 20 Oct 2020 12:42:58 +0100 Subject: [PATCH 6/9] Revert "Add minimal, black-compatible flake8 configuration (#524)" (#573) --- kedro/framework/cli/project.py | 6 ++++-- .../project/{{ cookiecutter.repo_name }}/setup.cfg | 4 ---- tests/framework/cli/test_project.py | 14 ++++++++++---- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/kedro/framework/cli/project.py b/kedro/framework/cli/project.py index 3ff30bd16a..953d6861bb 100644 --- a/kedro/framework/cli/project.py +++ b/kedro/framework/cli/project.py @@ -125,10 +125,12 @@ def lint(files, check_only): ) from exc python_call("black", ("--check",) + files if check_only else files) - python_call("flake8", files) + python_call("flake8", ("--max-line-length=88",) + files) check_flag = ("-c",) if check_only else () - python_call("isort", (*check_flag, "-rc") + files) + python_call( + "isort", (*check_flag, "-rc", "-tc", "-up", "-fgw=0", "-m=3", "-w=88") + files + ) @project_group.command() diff --git a/kedro/templates/project/{{ cookiecutter.repo_name }}/setup.cfg b/kedro/templates/project/{{ cookiecutter.repo_name }}/setup.cfg index 3226e16552..d24c0e98c4 100644 --- a/kedro/templates/project/{{ cookiecutter.repo_name }}/setup.cfg +++ b/kedro/templates/project/{{ cookiecutter.repo_name }}/setup.cfg @@ -1,7 +1,3 @@ -[flake8] -max-line-length=88 -extend-ignore=E203 - [isort] multi_line_output=3 include_trailing_comma=True diff --git a/tests/framework/cli/test_project.py b/tests/framework/cli/test_project.py index 5a113b0cfe..ca92a0a38d 100644 --- a/tests/framework/cli/test_project.py +++ b/tests/framework/cli/test_project.py @@ -165,8 +165,11 @@ def test_lint( ) expected_calls = [ mocker.call("black", expected_files), - mocker.call("flake8", expected_files), - mocker.call("isort", ("-rc",) + expected_files), + mocker.call("flake8", ("--max-line-length=88",) + expected_files), + mocker.call( + "isort", + ("-rc", "-tc", "-up", "-fgw=0", "-m=3", "-w=88") + expected_files, + ), ] assert python_call_mock.call_args_list == expected_calls @@ -198,8 +201,11 @@ def test_lint_check_only( ) expected_calls = [ mocker.call("black", ("--check",) + expected_files), - mocker.call("flake8", expected_files), - mocker.call("isort", ("-c", "-rc") + expected_files), + mocker.call("flake8", ("--max-line-length=88",) + expected_files), + mocker.call( + "isort", + ("-c", "-rc", "-tc", "-up", "-fgw=0", "-m=3", "-w=88") + expected_files, + ), ] assert python_call_mock.call_args_list == expected_calls From 1fca77d2c786cbb8e60e55ccdbf86fcb87fd606a Mon Sep 17 00:00:00 2001 From: Dmitrii Deriabin <44967953+DmitriiDeriabinQB@users.noreply.github.com> Date: Tue, 20 Oct 2020 16:32:59 +0100 Subject: [PATCH 7/9] Added spaceflights starter alias --- RELEASE.md | 1 + kedro/framework/cli/cli.py | 1 + 2 files changed, 2 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index 68a53b4018..c3bad79143 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -20,6 +20,7 @@ * Improved documentation. * Fixed issue with saving a `TensorFlowModelDataset` in the HDF5 format with versioning enabled. * Added missing `run_result` argument in `after_pipeline_run` Hooks spec. +* Added [kedro-starter-spaceflights](https://github.com/quantumblacklabs/kedro-starter-spaceflights/) alias for generating a project: `kedro new --starter spaceflights`. ## Breaking changes to the API diff --git a/kedro/framework/cli/cli.py b/kedro/framework/cli/cli.py index 657ee913db..c12e4b576e 100644 --- a/kedro/framework/cli/cli.py +++ b/kedro/framework/cli/cli.py @@ -78,6 +78,7 @@ "pandas-iris": "git+https://github.com/quantumblacklabs/kedro-starter-pandas-iris.git", "pyspark": "git+https://github.com/quantumblacklabs/kedro-starter-pyspark.git", "pyspark-iris": "git+https://github.com/quantumblacklabs/kedro-starter-pyspark-iris.git", + "spaceflights": "git+https://github.com/quantumblacklabs/kedro-starter-spaceflights.git", } From f56ed169f26da796bbef683054b00ced83663003 Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Wed, 21 Oct 2020 04:12:27 -0400 Subject: [PATCH 8/9] Fix TypeError when passing dict to wrapped partial (#556) --- RELEASE.md | 1 + kedro/pipeline/node.py | 2 +- tests/pipeline/test_node.py | 10 ++++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index c3bad79143..fb8e32e0a6 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -13,6 +13,7 @@ ## Major features and improvements ## Bug fixes and other changes +* Fixed `TypeError` when converting dict inputs to a node made from a wrapped `partial` function. * Improved handling of non-ASCII word characters in dataset names. - For example, a dataset named `jalapeño` will be accessible as `DataCatalog.datasets.jalapeño` rather than `DataCatalog.datasets.jalape__o`. * Fixed `kedro install` for an Anaconda environment defined in `environment.yml`. diff --git a/kedro/pipeline/node.py b/kedro/pipeline/node.py index 6e21820ddd..4bf0df75c4 100644 --- a/kedro/pipeline/node.py +++ b/kedro/pipeline/node.py @@ -677,7 +677,7 @@ def _dict_inputs_to_list(func: Callable[[Any], Any], inputs: Dict[str, str]): """Convert a dict representation of the node inputs to a list , ensuring the appropriate order for binding them to the node's function. """ - sig = inspect.signature(func).bind(**inputs) + sig = inspect.signature(func, follow_wrapped=False).bind(**inputs) # for deterministic behavior in python 3.5, sort kwargs inputs alphabetically return list(sig.args) + sorted(sig.kwargs.values()) diff --git a/tests/pipeline/test_node.py b/tests/pipeline/test_node.py index 1d90c44931..d82e533365 100644 --- a/tests/pipeline/test_node.py +++ b/tests/pipeline/test_node.py @@ -464,3 +464,13 @@ def test_updated_partial(self): assert str(n) == "identity([in]) -> [out]" assert n.name == "identity([in]) -> [out]" assert n.short_name == "Identity" + + def test_updated_partial_dict_inputs(self): + n = node( + update_wrapper(partial(biconcat, input1=["in1"]), biconcat), + dict(input2="in2"), + ["out"], + ) + assert str(n) == "biconcat([in2]) -> [out]" + assert n.name == "biconcat([in2]) -> [out]" + assert n.short_name == "Biconcat" From 93058c3238e6c6a161a2e2d5fe87f292724d8017 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lorena=20B=C4=83lan?= Date: Wed, 21 Oct 2020 10:38:16 +0100 Subject: [PATCH 9/9] [KED-2171] Fix IPython init script bug causing hooks to be registered twice (#832) --- .../.ipython/profile_default/startup/00-kedro-init.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kedro/templates/project/{{ cookiecutter.repo_name }}/.ipython/profile_default/startup/00-kedro-init.py b/kedro/templates/project/{{ cookiecutter.repo_name }}/.ipython/profile_default/startup/00-kedro-init.py index a0f53d1584..dfa1781cd3 100644 --- a/kedro/templates/project/{{ cookiecutter.repo_name }}/.ipython/profile_default/startup/00-kedro-init.py +++ b/kedro/templates/project/{{ cookiecutter.repo_name }}/.ipython/profile_default/startup/00-kedro-init.py @@ -3,6 +3,7 @@ from pathlib import Path from IPython.core.magic import register_line_magic, needs_local_scope +from kedro.framework.hooks import get_hook_manager # Find the project root (./../../../) startup_error = None @@ -38,6 +39,13 @@ def reload_kedro(path, line=None): for module in to_remove: del sys.modules[module] + # clear hook manager; hook implementations will be re-registered when the + # context is instantiated again in `load_context()` below + hook_manager = get_hook_manager() + name_plugin_pairs = hook_manager.list_name_plugin() + for name, plugin in name_plugin_pairs: + hook_manager.unregister(name=name, plugin=plugin) + logging.debug("Loading the context from %s", str(path)) # Reload context to fix `pickle` related error (it is unable to serialize reloaded objects) # Some details can be found here: