Skip to content

Commit

Permalink
Merge branch 'master' into feature/partition-versioning
Browse files Browse the repository at this point in the history
  • Loading branch information
deepyaman authored Oct 21, 2020
2 parents cc50803 + 93058c3 commit a67c808
Show file tree
Hide file tree
Showing 11 changed files with 74 additions and 7 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ htmlcov/
.coverage.*
.cache
nosetests.xml
test-output.xml
coverage.xml
*.cover
.hypothesis/
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,11 +142,12 @@ There is a growing community around Kedro. Have a look at the [Kedro FAQs](https
- [MercadoLibre Argentina](https://www.mercadolibre.com.ar)
- [Mosaic Data Science](https://www.youtube.com/watch?v=fCWGevB366g)
- [NaranjaX](https://www.youtube.com/watch?v=_0kMmRfltEQ)
- [Open Data Science LatAm](https://www.odsla.org/)
- [Open Data Science LatAm](https://www.odesla.org/)
- [Retrieva](https://tech.retrieva.jp/entry/2020/07/28/181414)
- [Roche](https://www.roche.com/)
- [UrbanLogiq](https://urbanlogiq.com/)
- [XP](https://youtu.be/wgnGOVNkXqU?t=2210)
- [Dendra Systems](https://www.dendra.io/)

## What licence do you use?

Expand Down
6 changes: 5 additions & 1 deletion RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,20 @@
## Major features and improvements

## Bug fixes and other changes
* Fixed `TypeError` when converting dict inputs to a node made from a wrapped `partial` function.
* Improved handling of non-ASCII word characters in dataset names.
- For example, a dataset named `jalapeño` will be accessible as `DataCatalog.datasets.jalapeño` rather than `DataCatalog.datasets.jalape__o`.
* Fixed `kedro install` for an Anaconda environment defined in `environment.yml`.
* Fixed backwards compatibility with templates generated with older Kedro versions <0.16.5. No longer need to update `.kedro.yml` to use `kedro lint` and `kedro jupyter notebook convert`.
* Improved documentation.
* Fixed issue with saving a `TensorFlowModelDataset` in the HDF5 format with versioning enabled.
* Added missing `run_result` argument in `after_pipeline_run` Hooks spec.
* Added [kedro-starter-spaceflights](https://github.com/quantumblacklabs/kedro-starter-spaceflights/) alias for generating a project: `kedro new --starter spaceflights`.

## Breaking changes to the API

## Thanks for supporting contributions
[Deepyaman Datta](https://github.com/deepyaman), [Bhavya Merchant](https://github.com/bnmerchant), [Lovkush Agarwal](https://github.com/Lovkush-A), [Varun Krishna S](https://github.com/vhawk19), [Sebastian Bertoli](https://github.com/sebastianbertoli)
[Deepyaman Datta](https://github.com/deepyaman), [Bhavya Merchant](https://github.com/bnmerchant), [Lovkush Agarwal](https://github.com/Lovkush-A), [Varun Krishna S](https://github.com/vhawk19), [Sebastian Bertoli](https://github.com/sebastianbertoli), [Daniel Petti](https://github.com/djpetti)

# Release 0.16.5

Expand Down
6 changes: 5 additions & 1 deletion kedro/extras/datasets/tensorflow/tensorflow_model_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"""
import copy
import tempfile
from pathlib import PurePath, PurePosixPath
from pathlib import Path, PurePath, PurePosixPath
from typing import Any, Dict

import fsspec
Expand Down Expand Up @@ -151,6 +151,10 @@ def _load(self) -> tf.keras.Model:
def _save(self, data: tf.keras.Model) -> None:
save_path = get_filepath_str(self._get_save_path(), self._protocol)

# Make sure all intermediate directories are created.
save_dir = Path(save_path).parent
save_dir.mkdir(parents=True, exist_ok=True)

with tempfile.TemporaryDirectory(prefix=self._tmp_prefix) as path:
if self._is_h5:
path = str(PurePath(path) / TEMPORARY_H5_FILE)
Expand Down
1 change: 1 addition & 0 deletions kedro/framework/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
"pandas-iris": "git+https://github.com/quantumblacklabs/kedro-starter-pandas-iris.git",
"pyspark": "git+https://github.com/quantumblacklabs/kedro-starter-pyspark.git",
"pyspark-iris": "git+https://github.com/quantumblacklabs/kedro-starter-pyspark-iris.git",
"spaceflights": "git+https://github.com/quantumblacklabs/kedro-starter-spaceflights.git",
}


Expand Down
7 changes: 6 additions & 1 deletion kedro/framework/hooks/specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,11 @@ def before_pipeline_run(

@hook_spec
def after_pipeline_run(
self, run_params: Dict[str, Any], pipeline: Pipeline, catalog: DataCatalog
self,
run_params: Dict[str, Any],
run_result: Dict[str, Any],
pipeline: Pipeline,
catalog: DataCatalog,
) -> None:
"""Hook to be invoked after a pipeline runs.
Expand All @@ -213,6 +217,7 @@ def after_pipeline_run(
"extra_params": Optional[Dict[str, Any]]
}
run_result: The output of ``Pipeline`` run.
pipeline: The ``Pipeline`` that was run.
catalog: The ``DataCatalog`` used during the run.
"""
Expand Down
2 changes: 1 addition & 1 deletion kedro/pipeline/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,7 @@ def _dict_inputs_to_list(func: Callable[[Any], Any], inputs: Dict[str, str]):
"""Convert a dict representation of the node inputs to a list , ensuring
the appropriate order for binding them to the node's function.
"""
sig = inspect.signature(func).bind(**inputs)
sig = inspect.signature(func, follow_wrapped=False).bind(**inputs)
# for deterministic behavior in python 3.5, sort kwargs inputs alphabetically
return list(sig.args) + sorted(sig.kwargs.values())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from pathlib import Path

from IPython.core.magic import register_line_magic, needs_local_scope
from kedro.framework.hooks import get_hook_manager

# Find the project root (./../../../)
startup_error = None
Expand Down Expand Up @@ -38,6 +39,13 @@ def reload_kedro(path, line=None):
for module in to_remove:
del sys.modules[module]

# clear hook manager; hook implementations will be re-registered when the
# context is instantiated again in `load_context()` below
hook_manager = get_hook_manager()
name_plugin_pairs = hook_manager.list_name_plugin()
for name, plugin in name_plugin_pairs:
hook_manager.unregister(name=name, plugin=plugin)

logging.debug("Loading the context from %s", str(path))
# Reload context to fix `pickle` related error (it is unable to serialize reloaded objects)
# Some details can be found here:
Expand Down
24 changes: 24 additions & 0 deletions tests/extras/datasets/tensorflow/test_tensorflow_model_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,30 @@ def test_save_and_load(
new_predictions = reloaded.predict(dummy_x_test)
np.testing.assert_allclose(predictions, new_predictions, rtol=1e-6, atol=1e-6)

def test_hdf5_save_format(
self,
dummy_tf_base_model,
dummy_x_test,
filepath,
tensorflow_model_dataset,
load_version,
save_version,
):
"""Test versioned TensorflowModelDataset can save TF graph models in
HDF5 format"""
hdf5_dataset = tensorflow_model_dataset(
filepath=filepath,
save_args={"save_format": "h5"},
version=Version(load_version, save_version),
)

predictions = dummy_tf_base_model.predict(dummy_x_test)
hdf5_dataset.save(dummy_tf_base_model)

reloaded = hdf5_dataset.load()
new_predictions = reloaded.predict(dummy_x_test)
np.testing.assert_allclose(predictions, new_predictions, rtol=1e-6, atol=1e-6)

def test_prevent_overwrite(self, dummy_tf_base_model, versioned_tf_model_dataset):
"""Check the error when attempting to override the data set if the
corresponding file for a given save version already exists."""
Expand Down
13 changes: 11 additions & 2 deletions tests/framework/hooks/test_context_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,11 +262,20 @@ def before_pipeline_run(

@hook_impl
def after_pipeline_run(
self, run_params: Dict[str, Any], pipeline: Pipeline, catalog: DataCatalog
self,
run_params: Dict[str, Any],
run_result: Dict[str, Any],
pipeline: Pipeline,
catalog: DataCatalog,
) -> None:
self.logger.info(
"Ran pipeline",
extra={"pipeline": pipeline, "run_params": run_params, "catalog": catalog},
extra={
"pipeline": pipeline,
"run_params": run_params,
"run_result": run_result,
"catalog": catalog,
},
)

@hook_impl
Expand Down
10 changes: 10 additions & 0 deletions tests/pipeline/test_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,3 +464,13 @@ def test_updated_partial(self):
assert str(n) == "identity([in]) -> [out]"
assert n.name == "identity([in]) -> [out]"
assert n.short_name == "Identity"

def test_updated_partial_dict_inputs(self):
n = node(
update_wrapper(partial(biconcat, input1=["in1"]), biconcat),
dict(input2="in2"),
["out"],
)
assert str(n) == "biconcat([in2]) -> [out]"
assert n.name == "biconcat([in2]) -> [out]"
assert n.short_name == "Biconcat"

0 comments on commit a67c808

Please sign in to comment.