Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add kedro catalog factory list CLI command #2796

1 change: 1 addition & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
## Major features and improvements
* Added dataset factories feature which uses pattern matching to reduce the number of catalog entries.
* Activated all built-in resolvers by default for `OmegaConfigLoader` except for `oc.env`.
* Added `kedro catalog factory list` CLI command.

## Bug fixes and other changes
* Updated `kedro catalog list` to work with dataset factories.
Expand Down
12 changes: 12 additions & 0 deletions kedro/framework/cli/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,18 @@ def create_catalog(metadata: ProjectMetadata, pipeline_name, env):
click.echo("All datasets are already configured.")


@catalog.command("factory list")
@env_option
@click.pass_obj
def list_patterns(metadata: ProjectMetadata, env):
"Show all factory patterns in the catalog, ranked by priority by which they are matched."
AhdraMeraliQB marked this conversation as resolved.
Show resolved Hide resolved
session = _create_session(metadata.package_name, env=env)
context = session.load_context()

catalog_factories = context.catalog._dataset_patterns
click.echo(yaml.dump(list(catalog_factories.keys())))
SajidAlamQB marked this conversation as resolved.
Show resolved Hide resolved


def _add_missing_datasets_to_catalog(missing_ds, catalog_path):
if catalog_path.is_file():
catalog_config = yaml.safe_load(catalog_path.read_text()) or {}
Expand Down
59 changes: 59 additions & 0 deletions tests/framework/cli/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,33 @@ def fake_catalog_config():
return config


@pytest.fixture
def fake_catalog_with_overlapping_factories():
config = {
"an_example_dataset": {
"type": "pandas.CSVDataSet",
"filepath": "dummy_filepath",
},
"an_example_{placeholder}": {
"type": "dummy_type",
"filepath": "dummy_filepath",
},
"an_example_{place}_{holder}": {
"type": "dummy_type",
"filepath": "dummy_filepath",
},
"on_{example_placeholder}": {
"type": "dummy_type",
"filepath": "dummy_filepath",
},
"an_{example_placeholder}": {
"type": "dummy_type",
"filepath": "dummy_filepath",
},
}
return config


@pytest.mark.usefixtures(
"chdir_to_dummy_project", "fake_load_context", "mock_pipelines"
)
Expand Down Expand Up @@ -360,3 +387,35 @@ def test_bad_env(self, fake_project_cli, fake_metadata):

assert result.exit_code
assert "Unable to instantiate Kedro session" in result.output


@pytest.mark.usefixtures(
"chdir_to_dummy_project", "fake_load_context", "mock_pipelines"
)
def test_list_catalog_factories(
merelcht marked this conversation as resolved.
Show resolved Hide resolved
AhdraMeraliQB marked this conversation as resolved.
Show resolved Hide resolved
fake_project_cli,
fake_metadata,
mocker,
fake_load_context,
fake_catalog_with_overlapping_factories,
):
yaml_dump_mock = mocker.patch("yaml.dump", return_value="Result YAML")
mocked_context = fake_load_context.return_value
mocked_context.catalog = DataCatalog.from_config(
fake_catalog_with_overlapping_factories
)

result = CliRunner().invoke(
fake_project_cli, ["catalog", "factory list"], obj=fake_metadata
)
assert not result.exit_code

expected_patterns_sorted = [
"an_example_{place}_{holder}",
"an_example_{placeholder}",
"an_{example_placeholder}",
"on_{example_placeholder}",
]

assert yaml_dump_mock.call_count == 1
assert yaml_dump_mock.call_args[0][0] == expected_patterns_sorted