diff --git a/RELEASE.md b/RELEASE.md index 00cbffeb90..7653b29fd4 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -13,6 +13,7 @@ ## Major features and improvements * Added dataset factories feature which uses pattern matching to reduce the number of catalog entries. * Activated all built-in resolvers by default for `OmegaConfigLoader` except for `oc.env`. +* Added `kedro catalog factory list` CLI command. ## Bug fixes and other changes * Updated `kedro catalog list` to work with dataset factories. diff --git a/kedro/framework/cli/catalog.py b/kedro/framework/cli/catalog.py index 2d74b8d4b3..2cc4769775 100644 --- a/kedro/framework/cli/catalog.py +++ b/kedro/framework/cli/catalog.py @@ -179,6 +179,26 @@ def create_catalog(metadata: ProjectMetadata, pipeline_name, env): click.echo("All datasets are already configured.") +@catalog.group() +def factory(): + """Commands for working with catalog dataset factories""" + + +@factory.command("list") +@env_option +@click.pass_obj +def list_factories(metadata: ProjectMetadata, env): + """Show all dataset factories in the catalog, ranked by priority by which they are matched.""" + session = _create_session(metadata.package_name, env=env) + context = session.load_context() + + catalog_factories = context.catalog._dataset_patterns + if catalog_factories: + click.echo(yaml.dump(list(catalog_factories.keys()))) + else: + click.echo("There are no dataset factories in the catalog.") + + def _add_missing_datasets_to_catalog(missing_ds, catalog_path): if catalog_path.is_file(): catalog_config = yaml.safe_load(catalog_path.read_text()) or {} diff --git a/tests/framework/cli/test_catalog.py b/tests/framework/cli/test_catalog.py index ca5d4a7a55..5ed8c3d370 100644 --- a/tests/framework/cli/test_catalog.py +++ b/tests/framework/cli/test_catalog.py @@ -42,6 +42,33 @@ def fake_catalog_config(): return config +@pytest.fixture +def fake_catalog_with_overlapping_factories(): + config = { + "an_example_dataset": { + "type": "pandas.CSVDataSet", + "filepath": "dummy_filepath", + }, + "an_example_{placeholder}": { + "type": "dummy_type", + "filepath": "dummy_filepath", + }, + "an_example_{place}_{holder}": { + "type": "dummy_type", + "filepath": "dummy_filepath", + }, + "on_{example_placeholder}": { + "type": "dummy_type", + "filepath": "dummy_filepath", + }, + "an_{example_placeholder}": { + "type": "dummy_type", + "filepath": "dummy_filepath", + }, + } + return config + + @pytest.mark.usefixtures( "chdir_to_dummy_project", "fake_load_context", "mock_pipelines" ) @@ -360,3 +387,60 @@ def test_bad_env(self, fake_project_cli, fake_metadata): assert result.exit_code assert "Unable to instantiate Kedro session" in result.output + + +@pytest.mark.usefixtures( + "chdir_to_dummy_project", "fake_load_context", "mock_pipelines" +) +def test_list_catalog_factories( + fake_project_cli, + fake_metadata, + mocker, + fake_load_context, + fake_catalog_with_overlapping_factories, +): + yaml_dump_mock = mocker.patch("yaml.dump", return_value="Result YAML") + mocked_context = fake_load_context.return_value + mocked_context.catalog = DataCatalog.from_config( + fake_catalog_with_overlapping_factories + ) + + result = CliRunner().invoke( + fake_project_cli, ["catalog", "factory", "list"], obj=fake_metadata + ) + assert not result.exit_code + + expected_patterns_sorted = [ + "an_example_{place}_{holder}", + "an_example_{placeholder}", + "an_{example_placeholder}", + "on_{example_placeholder}", + ] + + assert yaml_dump_mock.call_count == 1 + assert yaml_dump_mock.call_args[0][0] == expected_patterns_sorted + + +@pytest.mark.usefixtures( + "chdir_to_dummy_project", + "fake_load_context", +) +def test_list_factories_with_no_factories( + fake_project_cli, fake_metadata, fake_load_context +): + mocked_context = fake_load_context.return_value + + catalog_data_sets = { + "iris_data": CSVDataSet("test.csv"), + "intermediate": MemoryDataset(), + "not_used": CSVDataSet("test2.csv"), + } + mocked_context.catalog = DataCatalog(data_sets=catalog_data_sets) + + result = CliRunner().invoke( + fake_project_cli, ["catalog", "factory", "list"], obj=fake_metadata + ) + + assert not result.exit_code + expected_output = "There are no dataset factories in the catalog." + assert expected_output in result.output diff --git a/tests/tools/test_cli.py b/tests/tools/test_cli.py index 1b80ad8064..bc21a313f9 100644 --- a/tests/tools/test_cli.py +++ b/tests/tools/test_cli.py @@ -139,17 +139,18 @@ def test_get_cli_structure_help(self, mocker, fake_metadata): assert isinstance(help_cli_structure, dict) assert isinstance(help_cli_structure["kedro"], dict) - for k, v in help_cli_structure["kedro"].items(): - assert isinstance(k, str) - if isinstance(v, dict): - for sub_key in v: - assert isinstance(help_cli_structure["kedro"][k][sub_key], str) - assert help_cli_structure["kedro"][k][sub_key].startswith( - "Usage: [OPTIONS]" - ) - elif isinstance(v, str): - assert v.startswith("Usage: [OPTIONS]") + self.recursively_check_cli_structure(help_cli_structure["kedro"]) assert sorted(list(help_cli_structure["kedro"])) == sorted( DEFAULT_KEDRO_COMMANDS ) + + def recursively_check_cli_structure(self, structure): + for k, v in structure.items(): + assert isinstance(k, str) + if isinstance(v, str): + assert v.startswith("Usage: [OPTIONS]") + elif isinstance(v, dict): + self.recursively_check_cli_structure(v) + else: # Should never be reached + pytest.fail()