diff --git a/kedro/io/catalog_config_resolver.py b/kedro/io/catalog_config_resolver.py index 8ec624d9e9..aeafea81e3 100644 --- a/kedro/io/catalog_config_resolver.py +++ b/kedro/io/catalog_config_resolver.py @@ -129,6 +129,43 @@ def _resolve_value(key: str, value: Any) -> Any: return {k: _resolve_value(k, v) for k, v in config.items()} + @classmethod + def _validate_pattern_config(cls, ds_name: str, ds_config: dict[str, Any]) -> None: + """Checks whether a dataset factory pattern configuration is valid - all + keys used in the configuration present in the dataset factory pattern name. + + Args: + ds_name: Dataset factory pattern name. + ds_config: Dataset pattern configuration. + + Raises: + DatasetError: when keys used in the configuration do not present in the dataset factory pattern name. + + """ + # Find all occurrences of {} in the string including brackets + search_regex = r"\{.*?\}" + name_placeholders = set(re.findall(search_regex, ds_name)) + config_placeholders = set() + + def _traverse_config(config: Any) -> None: + if isinstance(config, dict): + for value in config.values(): + _traverse_config(value) + elif isinstance(config, (list, tuple)): + for value in config: + _traverse_config(value) + elif isinstance(config, str) and "}" in config: + config_placeholders.update(set(re.findall(search_regex, config))) + + _traverse_config(ds_config) + + if config_placeholders - name_placeholders: + raise DatasetError( + f"Incorrect dataset configuration provided. " + f"Keys used in the configuration {config_placeholders - name_placeholders} " + f"should present in the dataset factory pattern name {ds_name}." + ) + @classmethod def _resolve_dataset_config( cls, @@ -147,13 +184,7 @@ def _resolve_dataset_config( cls._resolve_dataset_config(ds_name, pattern, value) for value in config ] elif isinstance(config, str) and "}" in config: - try: - config = config.format_map(resolved_vars.named) - except KeyError as exc: - raise DatasetError( - f"Unable to resolve '{config}' from the pattern '{pattern}'. Keys used in the configuration " - f"should be present in the dataset factory pattern." - ) from exc + config = config.format_map(resolved_vars.named) return config def list_patterns(self) -> list[str]: @@ -192,6 +223,7 @@ def _extract_patterns( for ds_name, ds_config in config.items(): if cls.is_pattern(ds_name): + cls._validate_pattern_config(ds_name, ds_config) dataset_patterns[ds_name] = cls._resolve_credentials( ds_config, credentials ) diff --git a/tests/io/test_data_catalog.py b/tests/io/test_data_catalog.py index a552d8959c..39c0f77307 100644 --- a/tests/io/test_data_catalog.py +++ b/tests/io/test_data_catalog.py @@ -946,13 +946,12 @@ def test_unmatched_key_error_when_parsing_config( self, config_with_dataset_factories_bad_pattern ): """Check error raised when key mentioned in the config is not in pattern name""" - catalog = DataCatalog.from_config(**config_with_dataset_factories_bad_pattern) pattern = ( - "Unable to resolve 'data/01_raw/{brand}_plane.pq' from the pattern '{type}@planes'. " - "Keys used in the configuration should be present in the dataset factory pattern." + "Incorrect dataset configuration provided. Keys used in the configuration {'{brand}'} " + "should present in the dataset factory pattern name {type}@planes." ) with pytest.raises(DatasetError, match=re.escape(pattern)): - catalog._get_dataset("jet@planes") + _ = DataCatalog.from_config(**config_with_dataset_factories_bad_pattern) def test_factory_config_versioned( self, config_with_dataset_factories, filepath, dummy_dataframe