From c9fc80a2ba14d24910de904837b802db3f6c1283 Mon Sep 17 00:00:00 2001 From: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com> Date: Mon, 21 Aug 2023 16:06:15 +0100 Subject: [PATCH 1/2] Add globals feature for `OmegaConfigLoader` using a globals resolver (#2921) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Refactor load_and_merge_dir() Signed-off-by: Ankita Katiyar * Try adding globals resolver Signed-off-by: Ankita Katiyar * Minor change Signed-off-by: Ankita Katiyar * Add globals resolver Signed-off-by: Ankita Katiyar * Revert refactoring Signed-off-by: Ankita Katiyar * Add test + remove self.globals Signed-off-by: Ankita Katiyar * Allow for nested variables in globals Signed-off-by: Ankita Katiyar * Add documentation Signed-off-by: Ankita Katiyar * Typo Signed-off-by: Ankita Katiyar * Add error message + test Signed-off-by: Ankita Katiyar * Apply suggestions from code review Co-authored-by: Merel Theisen <49397448+merelcht@users.noreply.github.com> * Split test into multiple tests Signed-off-by: Ankita Katiyar * Restrict the globals config_patterns Signed-off-by: Ankita Katiyar * Release notes Signed-off-by: Ankita Katiyar * Update docs/source/configuration/advanced_configuration.md Co-authored-by: Merel Theisen <49397448+merelcht@users.noreply.github.com> * Add helpful error message for keys starting with _ Signed-off-by: Ankita Katiyar * Enable setting default value for globals resolver Signed-off-by: Ankita Katiyar * Typo Signed-off-by: Ankita Katiyar * Move test for keys starting with _ to the top Signed-off-by: Ankita Katiyar * Fix cross ref link in docs Signed-off-by: Ankita Katiyar --------- Signed-off-by: Ankita Katiyar Co-authored-by: Merel Theisen <49397448+merelcht@users.noreply.github.com> Co-authored-by: Juan Luis Cano Rodríguez Co-authored-by: Nok Lam Chan --- RELEASE.md | 2 + .../configuration/advanced_configuration.md | 35 ++++- .../configuration/configuration_basics.md | 5 +- docs/source/faq/faq.md | 1 + kedro/config/omegaconf_config.py | 37 ++++- tests/config/test_omegaconf_config.py | 126 ++++++++++++++++++ 6 files changed, 201 insertions(+), 5 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 169485d03e..bd338addd5 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -14,6 +14,8 @@ * Allowed registering of custom resolvers to `OmegaConfigLoader` through `CONFIG_LOADER_ARGS`. * Added support for Python 3.11. This includes tackling challenges like dependency pinning and test adjustments to ensure a smooth experience. Detailed migration tips are provided below for further context. * Added `kedro catalog resolve` CLI command that resolves dataset factories in the catalog with any explicit entries in the project pipeline. +* Added support for global variables to `OmegaConfigLoader`. + ## Bug fixes and other changes * Updated `kedro pipeline create` and `kedro catalog create` to use new `/conf` file structure. diff --git a/docs/source/configuration/advanced_configuration.md b/docs/source/configuration/advanced_configuration.md index 8e703b5bc2..6bf78d487e 100644 --- a/docs/source/configuration/advanced_configuration.md +++ b/docs/source/configuration/advanced_configuration.md @@ -34,7 +34,7 @@ folders: fea: "04_feature" ``` -To point your `TemplatedConfigLoader` to the globals file, add it to the the `CONFIG_LOADER_ARGS` variable in [`src//settings.py`](../kedro_project_setup/settings.md): +To point your `TemplatedConfigLoader` to the globals file, add it to the `CONFIG_LOADER_ARGS` variable in [`src//settings.py`](../kedro_project_setup/settings.md): ```python CONFIG_LOADER_ARGS = {"globals_pattern": "*globals.yml"} @@ -124,6 +124,7 @@ This section contains a set of guidance for advanced configuration requirements * [How to bypass the configuration loading rules](#how-to-bypass-the-configuration-loading-rules) * [How to use Jinja2 syntax in configuration](#how-to-use-jinja2-syntax-in-configuration) * [How to do templating with the `OmegaConfigLoader`](#how-to-do-templating-with-the-omegaconfigloader) +* [How to use global variables with the `OmegaConfigLoader`](#how-to-use-global-variables-with-the-omegaconfigloader) * [How to use resolvers in the `OmegaConfigLoader`](#how-to-use-resolvers-in-the-omegaconfigloader) * [How to load credentials through environment variables](#how-to-load-credentials-through-environment-variables) @@ -262,6 +263,38 @@ Since both of the file names (`catalog.yml` and `catalog_globals.yml`) match the #### Other configuration files It's also possible to use variable interpolation in configuration files other than parameters and catalog, such as custom spark or mlflow configuration. This works in the same way as variable interpolation in parameter files. You can still use the underscore for the templated values if you want, but it's not mandatory like it is for catalog files. +### How to use global variables with the `OmegaConfigLoader` +From Kedro `0.18.13`, you can use variable interpolation in your configurations using "globals" with `OmegaConfigLoader`. +The benefit of using globals over regular variable interpolation is that the global variables are shared across different configuration types, such as catalog and parameters. +By default, these global variables are assumed to be in files called `globals.yml` in any of your environments. If you want to configure the naming patterns for the files that contain your global variables, +you can do so [by overwriting the `globals` key in `config_patterns`](#how-to-change-which-configuration-files-are-loaded). You can also [bypass the configuration loading](#how-to-bypass-the-configuration-loading-rules) +to directly set the global variables in `OmegaConfigLoader`. + +Suppose you have global variables located in the file `conf/base/globals.yml`: +```yaml +my_global_value: 45 +dataset_type: + csv: pandas.CSVDataSet +``` +You can access these global variables in your catalog or parameters config files with a `globals` resolver like this: +`conf/base/parameters.yml`: +```yaml +my_param : "${globals:my_global_value}" +``` +`conf/base/catalog.yml`: +```yaml +companies: + filepath: data/01_raw/companies.csv + type: "${globals:dataset_type.csv}" +``` +You can also provide a default value to be used in case the global variable does not exist: +```yaml +my_param: "${globals: nonexistent_global, 23}" +``` +If there are duplicate keys in the globals files in your base and run time environments, the values in the run time environment +will overwrite the values in your base environment. + + ### How to use resolvers in the `OmegaConfigLoader` Instead of hard-coding values in your configuration files, you can also dynamically compute them using [`OmegaConf`'s resolvers functionality](https://omegaconf.readthedocs.io/en/2.3_branch/custom_resolvers.html#resolvers). You use resolvers to define custom diff --git a/docs/source/configuration/configuration_basics.md b/docs/source/configuration/configuration_basics.md index 2e964b512f..9e133f0e5e 100644 --- a/docs/source/configuration/configuration_basics.md +++ b/docs/source/configuration/configuration_basics.md @@ -61,7 +61,7 @@ Configuration files will be matched according to file name and type rules. Suppo ### Configuration patterns Under the hood, the Kedro configuration loader loads files based on regex patterns that specify the naming convention for configuration files. These patterns are specified by `config_patterns` in the configuration loader classes. -By default those patterns are set as follows for the configuration of catalog, parameters, logging and credentials: +By default those patterns are set as follows for the configuration of catalog, parameters, logging, credentials, and globals: ```python config_patterns = { @@ -69,10 +69,11 @@ config_patterns = { "parameters": ["parameters*", "parameters*/**", "**/parameters*"], "credentials": ["credentials*", "credentials*/**", "**/credentials*"], "logging": ["logging*", "logging*/**", "**/logging*"], + "globals": ["globals*", "globals*/**", "**/globals*"], } ``` -If you want to change change the way configuration is loaded, you can either [customise the config patterns](advanced_configuration.md#how-to-change-which-configuration-files-are-loaded) or [bypass the configuration loading](advanced_configuration.md#how-to-bypass-the-configuration-loading-rules) as described in the advanced configuration chapter. +If you want to change the way configuration is loaded, you can either [customise the config patterns](advanced_configuration.md#how-to-change-which-configuration-files-are-loaded) or [bypass the configuration loading](advanced_configuration.md#how-to-bypass-the-configuration-loading-rules) as described in the advanced configuration chapter. ## How to use Kedro configuration diff --git a/docs/source/faq/faq.md b/docs/source/faq/faq.md index 23cfa6b094..30bd2a1929 100644 --- a/docs/source/faq/faq.md +++ b/docs/source/faq/faq.md @@ -36,6 +36,7 @@ This is a growing set of technical FAQs. The [product FAQs on the Kedro website] * [How do I bypass the configuration loading rules](../configuration/advanced_configuration.md#how-to-bypass-the-configuration-loading-rules)? * [How do I use Jinja2 syntax in configuration](../configuration/advanced_configuration.md#how-to-use-jinja2-syntax-in-configuration)? * [How do I do templating with the `OmegaConfigLoader`](../configuration/advanced_configuration.md#how-to-do-templating-with-the-omegaconfigloader)? +* [How to use global variables with the `OmegaConfigLoader`](../configuration/advanced_configuration.md#how-to-use-global-variables-with-the-omegaconfigloader)? * [How do I use resolvers in the `OmegaConfigLoader`](../configuration/advanced_configuration.md#how-to-use-resolvers-in-the-omegaconfigloader)? * [How do I load credentials through environment variables](../configuration/advanced_configuration.md#how-to-load-credentials-through-environment-variables)? diff --git a/kedro/config/omegaconf_config.py b/kedro/config/omegaconf_config.py index 4d2ace59d4..0d7b5d740c 100644 --- a/kedro/config/omegaconf_config.py +++ b/kedro/config/omegaconf_config.py @@ -11,6 +11,7 @@ import fsspec from omegaconf import OmegaConf +from omegaconf.errors import InterpolationResolutionError from omegaconf.resolvers import oc from yaml.parser import ParserError from yaml.scanner import ScannerError @@ -109,6 +110,7 @@ def __init__( # noqa: too-many-arguments "parameters": ["parameters*", "parameters*/**", "**/parameters*"], "credentials": ["credentials*", "credentials*/**", "**/credentials*"], "logging": ["logging*", "logging*/**", "**/logging*"], + "globals": ["globals.yml"], } self.config_patterns.update(config_patterns or {}) @@ -117,7 +119,8 @@ def __init__( # noqa: too-many-arguments # Register user provided custom resolvers if custom_resolvers: self._register_new_resolvers(custom_resolvers) - + # Register globals resolver + self._register_globals_resolver() file_mimetype, _ = mimetypes.guess_type(conf_source) if file_mimetype == "application/x-tar": self._protocol = "tar" @@ -199,7 +202,7 @@ def __getitem__(self, key) -> dict[str, Any]: config.update(env_config) - if not processed_files: + if not processed_files and key != "globals": raise MissingConfigException( f"No files of YAML or JSON format found in {base_path} or {env_path} matching" f" the glob pattern(s): {[*self.config_patterns[key]]}" @@ -308,6 +311,36 @@ def _is_valid_config_path(self, path): ".json", ] + def _register_globals_resolver(self): + """Register the globals resolver""" + OmegaConf.register_new_resolver( + "globals", + lambda variable, default_value=None: self._get_globals_value( + variable, default_value + ), + replace=True, + ) + + def _get_globals_value(self, variable, default_value): + """Return the globals values to the resolver""" + if variable.startswith("_"): + raise InterpolationResolutionError( + "Keys starting with '_' are not supported for globals." + ) + keys = variable.split(".") + value = self["globals"] + for k in keys: + value = value.get(k) + if not value: + if default_value: + _config_logger.debug( + f"Using the default value for the global variable {variable}." + ) + return default_value + msg = f"Globals key '{variable}' not found and no default value provided. " + raise InterpolationResolutionError(msg) + return value + @staticmethod def _register_new_resolvers(resolvers: dict[str, Callable]): """Register custom resolvers""" diff --git a/tests/config/test_omegaconf_config.py b/tests/config/test_omegaconf_config.py index af57b52224..948aac7372 100644 --- a/tests/config/test_omegaconf_config.py +++ b/tests/config/test_omegaconf_config.py @@ -12,6 +12,7 @@ import pytest import yaml from omegaconf import OmegaConf, errors +from omegaconf.errors import InterpolationResolutionError from omegaconf.resolvers import oc from yaml.parser import ParserError @@ -671,3 +672,128 @@ def test_custom_resolvers(self, tmp_path): assert conf["parameters"]["model_options"]["param1"] == 7 assert conf["parameters"]["model_options"]["param2"] == 3 assert conf["parameters"]["model_options"]["param3"] == "my_env_variable" + + def test_globals(self, tmp_path): + globals_params = tmp_path / _BASE_ENV / "globals.yml" + globals_config = { + "x": 34, + } + _write_yaml(globals_params, globals_config) + conf = OmegaConfigLoader(tmp_path, default_run_env="") + # OmegaConfigLoader has globals resolver + assert OmegaConf.has_resolver("globals") + # Globals is readable in a dict way + assert conf["globals"] == globals_config + + def test_globals_resolution(self, tmp_path): + base_params = tmp_path / _BASE_ENV / "parameters.yml" + base_catalog = tmp_path / _BASE_ENV / "catalog.yml" + globals_params = tmp_path / _BASE_ENV / "globals.yml" + param_config = { + "my_param": "${globals:x}", + "my_param_default": "${globals:y,34}", # y does not exist in globals + } + catalog_config = { + "companies": { + "type": "${globals:dataset_type}", + "filepath": "data/01_raw/companies.csv", + }, + } + globals_config = {"x": 34, "dataset_type": "pandas.CSVDataSet"} + _write_yaml(base_params, param_config) + _write_yaml(globals_params, globals_config) + _write_yaml(base_catalog, catalog_config) + conf = OmegaConfigLoader(tmp_path, default_run_env="") + assert OmegaConf.has_resolver("globals") + # Globals are resolved correctly in parameter + assert conf["parameters"]["my_param"] == globals_config["x"] + # The default value is used if the key does not exist + assert conf["parameters"]["my_param_default"] == 34 + # Globals are resolved correctly in catalog + assert conf["catalog"]["companies"]["type"] == globals_config["dataset_type"] + + def test_globals_nested(self, tmp_path): + base_params = tmp_path / _BASE_ENV / "parameters.yml" + globals_params = tmp_path / _BASE_ENV / "globals.yml" + param_config = { + "my_param": "${globals:x}", + "my_nested_param": "${globals:nested.y}", + } + globals_config = { + "x": 34, + "nested": { + "y": 42, + }, + } + _write_yaml(base_params, param_config) + _write_yaml(globals_params, globals_config) + conf = OmegaConfigLoader(tmp_path, default_run_env="") + assert conf["parameters"]["my_param"] == globals_config["x"] + # Nested globals are accessible with dot notation + assert conf["parameters"]["my_nested_param"] == globals_config["nested"]["y"] + + def test_globals_across_env(self, tmp_path): + base_params = tmp_path / _BASE_ENV / "parameters.yml" + local_params = tmp_path / _DEFAULT_RUN_ENV / "parameters.yml" + base_globals = tmp_path / _BASE_ENV / "globals.yml" + local_globals = tmp_path / _DEFAULT_RUN_ENV / "globals.yml" + base_param_config = { + "param1": "${globals:y}", + } + local_param_config = { + "param2": "${globals:x}", + } + base_globals_config = { + "x": 34, + "y": 25, + } + local_globals_config = { + "y": 99, + } + _write_yaml(base_params, base_param_config) + _write_yaml(local_params, local_param_config) + _write_yaml(base_globals, base_globals_config) + _write_yaml(local_globals, local_globals_config) + conf = OmegaConfigLoader(tmp_path) + # Local global overwrites the base global value + assert conf["parameters"]["param1"] == local_globals_config["y"] + # Base global value is accessible to local params + assert conf["parameters"]["param2"] == base_globals_config["x"] + + def test_bad_globals(self, tmp_path): + base_params = tmp_path / _BASE_ENV / "parameters.yml" + base_globals = tmp_path / _BASE_ENV / "globals.yml" + base_param_config = { + "param1": "${globals:x.y}", + } + base_globals_config = { + "x": { + "z": 23, + }, + } + _write_yaml(base_params, base_param_config) + _write_yaml(base_globals, base_globals_config) + conf = OmegaConfigLoader(tmp_path, default_run_env="") + with pytest.raises( + InterpolationResolutionError, + match=r"Globals key 'x.y' not found and no default value provided.", + ): + conf["parameters"]["param1"] + + def test_bad_globals_underscore(self, tmp_path): + base_params = tmp_path / _BASE_ENV / "parameters.yml" + base_globals = tmp_path / _BASE_ENV / "globals.yml" + base_param_config = { + "param2": "${globals:_ignore}", + } + base_globals_config = { + "_ignore": 45, + } + _write_yaml(base_params, base_param_config) + _write_yaml(base_globals, base_globals_config) + conf = OmegaConfigLoader(tmp_path, default_run_env="") + with pytest.raises( + InterpolationResolutionError, + match=r"Keys starting with '_' are not supported for globals.", + ): + conf["parameters"]["param2"] From 4563a4c609cf808e9087561af5619f143ba3fcff Mon Sep 17 00:00:00 2001 From: Deepyaman Datta Date: Tue, 22 Aug 2023 03:06:45 -0600 Subject: [PATCH 2/2] Consolidate two `ruff-pre-commit` entries into one (#2881) * Clean up Ruff configuration in base pyproject.toml Signed-off-by: Deepyaman Datta * Consolidate two `ruff-pre-commit` entries into one Signed-off-by: Deepyaman Datta * Move Ruff hook to the top, because it's aggressive Signed-off-by: Deepyaman Datta * Add `kedro` as `known-first-party` for `isort` run Signed-off-by: Deepyaman Datta * Add noqa in docs/source/conf.py Signed-off-by: Deepyaman Datta * Fix ruff linter precommit config Signed-off-by: Nok --------- Signed-off-by: Deepyaman Datta Signed-off-by: Nok Co-authored-by: Nok --- .pre-commit-config.yaml | 29 +++++++++-------------------- docs/source/conf.py | 2 +- kedro/framework/cli/micropkg.py | 2 +- pyproject.toml | 20 ++++++++++++-------- 4 files changed, 23 insertions(+), 30 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 57bafd2416..ba9bf87df2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,6 +4,15 @@ default_stages: [commit, manual] repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.0.277 + hooks: + - id: ruff + name: "ruff on kedro/, tests/ and docs/" + args: ["--fix", "--exit-non-zero-on-fix"] + exclude: "^kedro/templates/|^features/steps/test_starter/" + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v2.2.3 hooks: @@ -48,26 +57,6 @@ repos: additional_dependencies: [black~=22.0] entry: blacken-docs --skip-errors - - repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: v0.0.277 - hooks: - - id: ruff - name: "ruff on kedro/" - args: ["--fix", "--show-fixes", "--exit-non-zero-on-fix"] - exclude: "^kedro/templates/|^features/steps/test_starter/|tests|docs" - - repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: v0.0.277 - hooks: - - id: ruff - name: "ruff on tests/ and docs/" - # PLR2004: Magic value used - # PLR0913: Too many arguments - args: ["--fix", "--show-fixes", "--exit-non-zero-on-fix", "--ignore=PLR2004,PLR0913"] - # include: "tests" - exclude: "^kedro/templates/|^features/steps/test_starter/|kedro" - - repo: local hooks: - id: black diff --git a/docs/source/conf.py b/docs/source/conf.py index 804bbbbfa9..4519143968 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -464,7 +464,7 @@ def autolink_classes_and_methods(lines): lines[i] = re.sub(existing, rf"{replacement}", lines[i]) -def autodoc_process_docstring(app, what, name, obj, options, lines): +def autodoc_process_docstring(app, what, name, obj, options, lines): # noqa: PLR0913 try: # guarded method to make sure build never fails log_suggestions(lines, name) diff --git a/kedro/framework/cli/micropkg.py b/kedro/framework/cli/micropkg.py index 36d103a332..f42ea0edbf 100644 --- a/kedro/framework/cli/micropkg.py +++ b/kedro/framework/cli/micropkg.py @@ -1,5 +1,5 @@ """A collection of CLI commands for working with Kedro micro-packages.""" -# ruff: noqa: I001 # https://github.com/kedro-org/kedro/pull/2634 +# ruff: noqa: I001 from __future__ import annotations import logging diff --git a/pyproject.toml b/pyproject.toml index df3bf4a2fe..0421e10093 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -206,14 +206,18 @@ ignore_imports = [ [tool.ruff] line-length = 88 show-fixes = true -# select = ["A", "B", "C", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"] select = [ - "F", # Pyflakes - "E", # Pycodestyle - "W", # Pycodestyle + "F", # Pyflakes + "W", # pycodestyle + "E", # pycodestyle + "I", # isort "UP", # pyupgrade - "I", # isort - "PL", # Pylint + "PL", # Pylint ] -ignore = ["E501"] # Black take care off line-too-long -unfixable = [] +ignore = ["E501"] # Black takes care of line-too-long + +[tool.ruff.isort] +known-first-party = ["kedro"] + +[tool.ruff.per-file-ignores] +"{tests,docs}/*" = ["PLR2004","PLR0913"]