Skip to content

Commit

Permalink
Merge branch 'main' into pipeline-performance-test
Browse files Browse the repository at this point in the history
  • Loading branch information
lrcouto authored Sep 26, 2024
2 parents f4fa341 + d0ac061 commit d476736
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 11 deletions.
46 changes: 39 additions & 7 deletions kedro/io/catalog_config_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,43 @@ def _resolve_value(key: str, value: Any) -> Any:

return {k: _resolve_value(k, v) for k, v in config.items()}

@classmethod
def _validate_pattern_config(cls, ds_name: str, ds_config: dict[str, Any]) -> None:
"""Checks whether a dataset factory pattern configuration is valid - all
keys used in the configuration present in the dataset factory pattern name.
Args:
ds_name: Dataset factory pattern name.
ds_config: Dataset pattern configuration.
Raises:
DatasetError: when keys used in the configuration do not present in the dataset factory pattern name.
"""
# Find all occurrences of {} in the string including brackets
search_regex = r"\{.*?\}"
name_placeholders = set(re.findall(search_regex, ds_name))
config_placeholders = set()

def _traverse_config(config: Any) -> None:
if isinstance(config, dict):
for value in config.values():
_traverse_config(value)
elif isinstance(config, (list, tuple)):
for value in config:
_traverse_config(value)
elif isinstance(config, str) and "}" in config:
config_placeholders.update(set(re.findall(search_regex, config)))

_traverse_config(ds_config)

if config_placeholders - name_placeholders:
raise DatasetError(
f"Incorrect dataset configuration provided. "
f"Keys used in the configuration {config_placeholders - name_placeholders} "
f"should present in the dataset factory pattern name {ds_name}."
)

@classmethod
def _resolve_dataset_config(
cls,
Expand All @@ -147,13 +184,7 @@ def _resolve_dataset_config(
cls._resolve_dataset_config(ds_name, pattern, value) for value in config
]
elif isinstance(config, str) and "}" in config:
try:
config = config.format_map(resolved_vars.named)
except KeyError as exc:
raise DatasetError(
f"Unable to resolve '{config}' from the pattern '{pattern}'. Keys used in the configuration "
f"should be present in the dataset factory pattern."
) from exc
config = config.format_map(resolved_vars.named)
return config

def list_patterns(self) -> list[str]:
Expand Down Expand Up @@ -192,6 +223,7 @@ def _extract_patterns(

for ds_name, ds_config in config.items():
if cls.is_pattern(ds_name):
cls._validate_pattern_config(ds_name, ds_config)
dataset_patterns[ds_name] = cls._resolve_credentials(
ds_config, credentials
)
Expand Down
7 changes: 3 additions & 4 deletions tests/io/test_data_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -946,13 +946,12 @@ def test_unmatched_key_error_when_parsing_config(
self, config_with_dataset_factories_bad_pattern
):
"""Check error raised when key mentioned in the config is not in pattern name"""
catalog = DataCatalog.from_config(**config_with_dataset_factories_bad_pattern)
pattern = (
"Unable to resolve 'data/01_raw/{brand}_plane.pq' from the pattern '{type}@planes'. "
"Keys used in the configuration should be present in the dataset factory pattern."
"Incorrect dataset configuration provided. Keys used in the configuration {'{brand}'} "
"should present in the dataset factory pattern name {type}@planes."
)
with pytest.raises(DatasetError, match=re.escape(pattern)):
catalog._get_dataset("jet@planes")
_ = DataCatalog.from_config(**config_with_dataset_factories_bad_pattern)

def test_factory_config_versioned(
self, config_with_dataset_factories, filepath, dummy_dataframe
Expand Down

0 comments on commit d476736

Please sign in to comment.