Skip to content

Commit

Permalink
Update kedro catalog create to use new /conf structure (#2884)
Browse files Browse the repository at this point in the history
* modify test_catalog, remove nesting in the catalog folder

Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>

* move catalog foldername to the yml filename, remove nesting

Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>

* Update RELEASE.md

Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>

* Modify docs, fix broken link

Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>

---------

Signed-off-by: Dmitry Sorokin <dmd40in@gmail.com>
  • Loading branch information
DimedS authored Aug 8, 2023
1 parent f253ba8 commit d768243
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 15 deletions.
2 changes: 1 addition & 1 deletion RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* Allowed registering of custom resolvers to `OmegaConfigLoader` through `CONFIG_LOADER_ARGS`.

## Bug fixes and other changes
* Updated `kedro pipeline create` to use new `/conf` file structure.
* Updated `kedro pipeline create` and `kedro catalog create` to use new `/conf` file structure.

## Documentation changes

Expand Down
2 changes: 1 addition & 1 deletion docs/source/configuration/configuration_basics.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ Kedro merges configuration information and returns a configuration dictionary ac
* If any two configuration files located inside the **same** environment path (such as `conf/base/`) contain the same top-level key, the configuration loader raises a `ValueError` indicating that duplicates are not allowed.
* If two configuration files contain the same top-level key but are in **different** environment paths (for example, one in `conf/base/`, another in `conf/local/`) then the last loaded path (`conf/local/`) takes precedence as the key value. `ConfigLoader.get` does not raise any errors but a `DEBUG` level log message is emitted with information on the overridden keys.

When using any of the configuration loaders, any top-level keys that start with `_` are considered hidden (or reserved) and are ignored. Those keys will neither trigger a key duplication error nor appear in the resulting configuration dictionary. However, you can still use such keys, for example, as [YAML anchors and aliases](https://www.educative.io/blog/advanced-yaml-syntax-cheatsheet#anchors)
When using any of the configuration loaders, any top-level keys that start with `_` are considered hidden (or reserved) and are ignored. Those keys will neither trigger a key duplication error nor appear in the resulting configuration dictionary. However, you can still use such keys, for example, as [YAML anchors and aliases](https://www.educative.io/blog/advanced-yaml-syntax-cheatsheet)
or [to enable templating in the catalog when using the `OmegaConfigLoader`](advanced_configuration.md#how-to-do-templating-with-the-omegaconfigloader).

### Configuration file names
Expand Down
3 changes: 1 addition & 2 deletions kedro/framework/cli/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,7 @@ def create_catalog(metadata: ProjectMetadata, pipeline_name, env):
context.project_path
/ settings.CONF_SOURCE
/ env
/ "catalog"
/ f"{pipeline_name}.yml"
/ f"catalog_{pipeline_name}.yml"
)
_add_missing_datasets_to_catalog(missing_ds, catalog_path)
click.echo(f"Data Catalog YAML configuration was created: {catalog_path}")
Expand Down
19 changes: 8 additions & 11 deletions tests/framework/cli/test_catalog.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import shutil

import pytest
import yaml
from click.testing import CliRunner
Expand Down Expand Up @@ -242,11 +240,12 @@ class TestCatalogCreateCommand:
@staticmethod
@pytest.fixture(params=["base"])
def catalog_path(request, fake_repo_path):
catalog_path = fake_repo_path / "conf" / request.param / "catalog"
catalog_path = fake_repo_path / "conf" / request.param

yield catalog_path

shutil.rmtree(catalog_path, ignore_errors=True)
for file in catalog_path.glob("catalog_*"):
file.unlink()

def test_pipeline_argument_is_required(self, fake_project_cli):
result = CliRunner().invoke(fake_project_cli, ["catalog", "create"])
Expand Down Expand Up @@ -278,7 +277,7 @@ def test_catalog_is_created_in_base_by_default(
main_catalog_config = yaml.safe_load(main_catalog_path.read_text())
assert "example_iris_data" in main_catalog_config

data_catalog_file = catalog_path / f"{self.PIPELINE_NAME}.yml"
data_catalog_file = catalog_path / f"catalog_{self.PIPELINE_NAME}.yml"

result = CliRunner().invoke(
fake_project_cli,
Expand All @@ -302,9 +301,9 @@ def test_catalog_is_created_in_base_by_default(
def test_catalog_is_created_in_correct_env(
self, fake_project_cli, fake_metadata, catalog_path
):
data_catalog_file = catalog_path / f"{self.PIPELINE_NAME}.yml"
data_catalog_file = catalog_path / f"catalog_{self.PIPELINE_NAME}.yml"

env = catalog_path.parent.name
env = catalog_path.name
result = CliRunner().invoke(
fake_project_cli,
["catalog", "create", "--pipeline", self.PIPELINE_NAME, "--env", env],
Expand Down Expand Up @@ -335,7 +334,7 @@ def test_no_missing_datasets(
)

data_catalog_file = (
fake_repo_path / "conf" / "base" / "catalog" / f"{self.PIPELINE_NAME}.yml"
fake_repo_path / "conf" / "base" / f"catalog_{self.PIPELINE_NAME}.yml"
)

result = CliRunner().invoke(
Expand All @@ -351,9 +350,7 @@ def test_no_missing_datasets(
def test_missing_datasets_appended(
self, fake_project_cli, fake_metadata, catalog_path
):
data_catalog_file = catalog_path / f"{self.PIPELINE_NAME}.yml"
assert not catalog_path.exists()
catalog_path.mkdir()
data_catalog_file = catalog_path / f"catalog_{self.PIPELINE_NAME}.yml"

catalog_config = {
"example_test_x": {"type": "pandas.CSVDataSet", "filepath": "test.csv"}
Expand Down

0 comments on commit d768243

Please sign in to comment.