Skip to content

Commit

Permalink
Merge branch 'main' into pipeline-performance-test
Browse files Browse the repository at this point in the history
  • Loading branch information
lrcouto authored Sep 25, 2024
2 parents 28b938a + 7537eae commit f4fa341
Show file tree
Hide file tree
Showing 12 changed files with 1,162 additions and 148 deletions.
7 changes: 7 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Upcoming Release

## Major features and improvements
* Implemented `KedroDataCatalog` repeating `DataCatalog` functionality with a few API enhancements:
* Removed `_FrozenDatasets` and access datasets as properties;
* Added get dataset by name feature;
* `add_feed_dict()` was simplified and renamed to `add_data()`;
* Datasets' initialisation was moved out from `from_config()` method to the constructor.
* Moved development requirements from `requirements.txt` to the dedicated section in `pyproject.toml` for project template.
* Implemented `Protocol` abstraction for the current `DataCatalog` and adding new catalog implementations.
* Refactored `kedro run` and `kedro catalog` commands.
Expand All @@ -9,6 +14,7 @@
* Enhanced `OmegaConfigLoader` configuration validation to detect duplicate keys at all parameter levels, ensuring comprehensive nested key checking.
## Bug fixes and other changes
* Fixed bug where using dataset factories breaks with `ThreadRunner`.
* Fixed a bug where `SharedMemoryDataset.exists` would not call the underlying `MemoryDataset`.
* Fixed template projects example tests.
* Made credentials loading consistent between `KedroContext._get_catalog()` and `resolve_patterns` so that both us
e `_get_config_credentials()`
Expand All @@ -25,6 +31,7 @@ e `_get_config_credentials()`
* [ethanknights](https://github.com/ethanknights)
* [Manezki](https://github.com/Manezki)
* [MigQ2](https://github.com/MigQ2)
* [Felix Scherz](https://github.com/felixscherz)

# Release 0.19.8

Expand Down
2 changes: 2 additions & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@
"kedro.io.core.DatasetError",
"kedro.io.core.Version",
"kedro.io.data_catalog.DataCatalog",
"kedro.io.kedro_data_catalog.KedroDataCatalog",
"kedro.io.memory_dataset.MemoryDataset",
"kedro.io.partitioned_dataset.PartitionedDataset",
"kedro.pipeline.pipeline.Pipeline",
Expand Down Expand Up @@ -172,6 +173,7 @@
"Patterns",
"CatalogConfigResolver",
"CatalogProtocol",
"KedroDataCatalog",
),
"py:data": (
"typing.Any",
Expand Down
6 changes: 2 additions & 4 deletions kedro/framework/cli/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,7 @@ def list_datasets(metadata: ProjectMetadata, pipeline: str, env: str) -> None:

for ds_name in default_ds:
if data_catalog.config_resolver.match_pattern(ds_name):
ds_config = data_catalog.config_resolver.resolve_dataset_pattern(
ds_name
)
ds_config = data_catalog.config_resolver.resolve_pattern(ds_name)
factory_ds_by_type[ds_config.get("type", "DefaultDataset")].append(
ds_name
)
Expand Down Expand Up @@ -250,7 +248,7 @@ def resolve_patterns(metadata: ProjectMetadata, env: str) -> None:
if ds_name in explicit_datasets or is_parameter(ds_name):
continue

ds_config = data_catalog.config_resolver.resolve_dataset_pattern(ds_name)
ds_config = data_catalog.config_resolver.resolve_pattern(ds_name)

# Exclude MemoryDatasets not set in the catalog explicitly
if ds_config:
Expand Down
2 changes: 2 additions & 0 deletions kedro/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
Version,
)
from .data_catalog import DataCatalog
from .kedro_data_catalog import KedroDataCatalog
from .lambda_dataset import LambdaDataset
from .memory_dataset import MemoryDataset
from .shared_memory_dataset import SharedMemoryDataset
Expand All @@ -30,6 +31,7 @@
"DatasetAlreadyExistsError",
"DatasetError",
"DatasetNotFoundError",
"KedroDataCatalog",
"LambdaDataset",
"MemoryDataset",
"SharedMemoryDataset",
Expand Down
2 changes: 1 addition & 1 deletion kedro/io/catalog_config_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def _resolve_config_credentials(

return resolved_configs

def resolve_dataset_pattern(self, ds_name: str) -> dict[str, Any]:
def resolve_pattern(self, ds_name: str) -> dict[str, Any]:
"""Resolve dataset patterns and return resolved configurations based on the existing patterns."""
matched_pattern = self.match_pattern(ds_name)

Expand Down
4 changes: 2 additions & 2 deletions kedro/io/data_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def __setattr__(self, key: str, value: Any) -> None:
if key == "_original_names":
super().__setattr__(key, value)
return
msg = "Operation not allowed! "
msg = "Operation not allowed. "
if key in self.__dict__:
msg += "Please change datasets through configuration."
else:
Expand Down Expand Up @@ -324,7 +324,7 @@ def _get_dataset(
version: Version | None = None,
suggest: bool = True,
) -> AbstractDataset:
ds_config = self._config_resolver.resolve_dataset_pattern(dataset_name)
ds_config = self._config_resolver.resolve_pattern(dataset_name)

if dataset_name not in self._datasets and ds_config:
ds = AbstractDataset.from_config(
Expand Down
Loading

0 comments on commit f4fa341

Please sign in to comment.