From b4ae279c0467bf633006bfe059b936e5a6b34763 Mon Sep 17 00:00:00 2001 From: Elena Khaustova Date: Mon, 7 Oct 2024 18:33:28 +0100 Subject: [PATCH 1/4] Removed add_data() method Signed-off-by: Elena Khaustova --- RELEASE.md | 2 +- kedro/io/kedro_data_catalog.py | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 61560acf87..c9a871b0e0 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -4,7 +4,7 @@ * Implemented `KedroDataCatalog` repeating `DataCatalog` functionality with a few API enhancements: * Removed `_FrozenDatasets` and access datasets as properties; * Added get dataset by name feature; - * `add_feed_dict()` was simplified and renamed to `add_data()`; + * `add_feed_dict()` was simplified to only add raw data; * Datasets' initialisation was moved out from `from_config()` method to the constructor. * Moved development requirements from `requirements.txt` to the dedicated section in `pyproject.toml` for project template. * Implemented `Protocol` abstraction for the current `DataCatalog` and adding new catalog implementations. diff --git a/kedro/io/kedro_data_catalog.py b/kedro/io/kedro_data_catalog.py index ce06e34aac..882eb19cf8 100644 --- a/kedro/io/kedro_data_catalog.py +++ b/kedro/io/kedro_data_catalog.py @@ -68,7 +68,7 @@ def __init__( self._add_from_config(ds_name, ds_config) if raw_data: - self.add_data(raw_data) + self.add_feed_dict(raw_data) @property def datasets(self) -> dict[str, Any]: @@ -304,16 +304,13 @@ def confirm(self, name: str) -> None: else: raise DatasetError(f"Dataset '{name}' does not have 'confirm' method") - def add_data(self, data: dict[str, Any], replace: bool = False) -> None: + def add_feed_dict(self, feed_dict: dict[str, Any], replace: bool = False) -> None: + # TODO: remove when removing old catalog # This method was simplified to add memory datasets only, since # adding AbstractDataset can be done via add() method - for ds_name, ds_data in data.items(): + for ds_name, ds_data in feed_dict.items(): self.add(ds_name, MemoryDataset(data=ds_data), replace) # type: ignore[abstract] - def add_feed_dict(self, feed_dict: dict[str, Any], replace: bool = False) -> None: - # TODO: remove when removing old catalog - return self.add_data(feed_dict, replace) - def shallow_copy( self, extra_dataset_patterns: Patterns | None = None ) -> KedroDataCatalog: From 5bdf16b4254be1e4db277d8955de98f7bc52f6dc Mon Sep 17 00:00:00 2001 From: Elena Khaustova Date: Mon, 7 Oct 2024 18:49:08 +0100 Subject: [PATCH 2/4] Added usage example and updated docstrings with experimental feature note Signed-off-by: Elena Khaustova --- RELEASE.md | 6 ++++-- kedro/io/kedro_data_catalog.py | 13 +++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index c9a871b0e0..e80e7f112d 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -12,12 +12,14 @@ * Moved pattern resolution logic from `DataCatalog` to a separate component - `CatalogConfigResolver`. Updated `DataCatalog` to use `CatalogConfigResolver` internally. * Made packaged Kedro projects return `session.run()` output to be used when running it in the interactive environment. * Enhanced `OmegaConfigLoader` configuration validation to detect duplicate keys at all parameter levels, ensuring comprehensive nested key checking. + +**Note:** ``KedroDataCatalog`` is an experimental feature, so please mind possible breaking changes while using it. + ## Bug fixes and other changes * Fixed bug where using dataset factories breaks with `ThreadRunner`. * Fixed a bug where `SharedMemoryDataset.exists` would not call the underlying `MemoryDataset`. * Fixed template projects example tests. -* Made credentials loading consistent between `KedroContext._get_catalog()` and `resolve_patterns` so that both us -e `_get_config_credentials()` +* Made credentials loading consistent between `KedroContext._get_catalog()` and `resolve_patterns` so that both use `_get_config_credentials()` ## Breaking changes to the API * Removed `ShelveStore` to address a security vulnerability. diff --git a/kedro/io/kedro_data_catalog.py b/kedro/io/kedro_data_catalog.py index 882eb19cf8..f9cb40ae4c 100644 --- a/kedro/io/kedro_data_catalog.py +++ b/kedro/io/kedro_data_catalog.py @@ -3,6 +3,9 @@ use a ``KedroDataCatalog``, you need to instantiate it with a dictionary of datasets. Then it will act as a single point of reference for your calls, relaying load and save functions to the underlying datasets. + +``KedroDataCatalog`` is an experimental feature aimed to replace ``DataCatalog`` in the future. +Expect possible breaking changes while using it. """ from __future__ import annotations @@ -44,6 +47,9 @@ def __init__( single point of reference for your calls, relaying load and save functions to the underlying datasets. + Note: ``KedroDataCatalog`` is an experimental feature, so please mind + possible breaking changes while using it. + Args: datasets: A dictionary of dataset names and dataset instances. raw_data: A dictionary with data to be added in memory as `MemoryDataset`` instances. @@ -56,6 +62,13 @@ def __init__( case-insensitive string that conforms with operating system filename limitations, b) always return the latest version when sorted in lexicographical order. + + Example: + :: + >>> # settings.py + >>> from kedro.io import KedroDataCatalog + >>> + >>> DATA_CATALOG_CLASS = KedroDataCatalog """ self._config_resolver = config_resolver or CatalogConfigResolver() self._datasets = datasets or {} From a50fbc9af5fe7302b6cc60f967195b03647de935 Mon Sep 17 00:00:00 2001 From: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com> Date: Tue, 8 Oct 2024 14:46:20 +0100 Subject: [PATCH 3/4] Update RELEASE.md Co-authored-by: Merel Theisen <49397448+merelcht@users.noreply.github.com> Signed-off-by: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com> --- RELEASE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index e80e7f112d..19621ea499 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -13,7 +13,7 @@ * Made packaged Kedro projects return `session.run()` output to be used when running it in the interactive environment. * Enhanced `OmegaConfigLoader` configuration validation to detect duplicate keys at all parameter levels, ensuring comprehensive nested key checking. -**Note:** ``KedroDataCatalog`` is an experimental feature, so please mind possible breaking changes while using it. +**Note:** ``KedroDataCatalog`` is an experimental feature and is under active development. Therefore, it is possible we'll introduce breaking changes to this class, so be mindful of that if you decide to use it already. Let us know if you have any feedback about the ``KedroDataCatalog`` or ideas for new features. ## Bug fixes and other changes * Fixed bug where using dataset factories breaks with `ThreadRunner`. From 5b02d05de08b80b8aa5772116c3beada2b8598be Mon Sep 17 00:00:00 2001 From: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com> Date: Tue, 8 Oct 2024 14:46:36 +0100 Subject: [PATCH 4/4] Update kedro/io/kedro_data_catalog.py Co-authored-by: Merel Theisen <49397448+merelcht@users.noreply.github.com> Signed-off-by: ElenaKhaustova <157851531+ElenaKhaustova@users.noreply.github.com> --- kedro/io/kedro_data_catalog.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kedro/io/kedro_data_catalog.py b/kedro/io/kedro_data_catalog.py index f9cb40ae4c..d07de8151a 100644 --- a/kedro/io/kedro_data_catalog.py +++ b/kedro/io/kedro_data_catalog.py @@ -47,8 +47,7 @@ def __init__( single point of reference for your calls, relaying load and save functions to the underlying datasets. - Note: ``KedroDataCatalog`` is an experimental feature, so please mind - possible breaking changes while using it. + Note: ``KedroDataCatalog`` is an experimental feature and is under active development. Therefore, it is possible we'll introduce breaking changes to this class, so be mindful of that if you decide to use it already. Args: datasets: A dictionary of dataset names and dataset instances.