From bda15bb2b34d2ddda9641660c851906bbe577aaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yolan=20Honor=C3=A9-Roug=C3=A9?= <29451317+Galileo-Galilei@users.noreply.github.com> Date: Wed, 17 Apr 2024 23:52:41 +0200 Subject: [PATCH] :sparkles: Make project CLI init, ui and modelify work inside subdirectory (#531) (#539) * :sparkles: Make project CLI init, ui and modelify work inside subdirectory (#531) * add tests * add changelog * relax requirements * fix typo in changelog --- CHANGELOG.md | 8 +++++ kedro_mlflow/framework/cli/cli.py | 11 ++++--- kedro_mlflow/utils.py | 22 ++++++++++++- tests/framework/cli/test_cli.py | 37 ++++++++++++++-------- tests/framework/cli/test_cli_modelify.py | 39 +++++++++++++++--------- 5 files changed, 83 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 185c3c45..85be902e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ - :construction_worker: :package: Add build distribution (instead of source only distribution) to PyPI release to make install easier and faster ([#515](https://github.com/Galileo-Galilei/kedro-mlflow/issues/515)) - :memo: Document the ability to update configuration at runtime ([#395](https://github.com/Galileo-Galilei/kedro-mlflow/issues/395)) +### Changed + +- :sparkles: Project level CLI commands ``kedro mlflow init``, ``kedro mlflow ui`` and ``kedro mlflow modelify`` now work even inside a subdirectory and not at the root of the kedro project to be [consistent with ``kedro>0.19.4``](https://github.com/kedro-org/kedro/blob/main/RELEASE.md#major-features-and-improvements-1) ([#531](https://github.com/Galileo-Galilei/kedro-mlflow/issues/531)) + +### Fixed + +- :bug: Proxy import of private kedro functions ``_is_project`` and ``_find_kedro_project`` to be resilient to changes ([#531](https://github.com/Galileo-Galilei/kedro-mlflow/issues/531)) + ## [0.12.1] - 2024-02-09 ### Added diff --git a/kedro_mlflow/framework/cli/cli.py b/kedro_mlflow/framework/cli/cli.py index 81622968..699b2001 100644 --- a/kedro_mlflow/framework/cli/cli.py +++ b/kedro_mlflow/framework/cli/cli.py @@ -11,12 +11,13 @@ from kedro import __version__ as kedro_version from kedro.framework.project import pipelines, settings from kedro.framework.session import KedroSession -from kedro.framework.startup import _is_project, bootstrap_project +from kedro.framework.startup import bootstrap_project from mlflow.models import infer_signature from packaging import version from kedro_mlflow.framework.cli.cli_utils import write_jinja_template from kedro_mlflow.mlflow import KedroPipelineModel +from kedro_mlflow.utils import _find_kedro_project, _is_project LOGGER = getLogger(__name__) TEMPLATE_FOLDER_PATH = Path(__file__).parent.parent.parent / "template" / "project" @@ -27,7 +28,7 @@ def reset_commands(self): self.commands = {} # add commands on the fly based on conditions - if _is_project(Path.cwd()): + if _is_project(_find_kedro_project(Path.cwd()) or Path.cwd()): self.add_command(init) self.add_command(ui) self.add_command(modelify) @@ -87,7 +88,7 @@ def init(env: str, force: bool, silent: bool): # get constants mlflow_yml = "mlflow.yml" - project_path = Path().cwd() + project_path = _find_kedro_project(Path.cwd()) or Path.cwd() project_metadata = bootstrap_project(project_path) mlflow_yml_path = project_path / settings.CONF_SOURCE / env / mlflow_yml @@ -150,7 +151,7 @@ def ui(env: str, port: str, host: str): enables to browse and compares runs. """ - project_path = Path().cwd() + project_path = _find_kedro_project(Path.cwd()) or Path.cwd() bootstrap_project(project_path) with KedroSession.create( project_path=project_path, @@ -307,7 +308,7 @@ def modelify( # if the command is available, we are necessarily at the root of a kedro project - project_path = Path.cwd() + project_path = _find_kedro_project(Path.cwd()) or Path.cwd() bootstrap_project(project_path) with KedroSession.create(project_path=project_path) as session: # "pipeline" is the Pipeline object you want to convert to a mlflow model diff --git a/kedro_mlflow/utils.py b/kedro_mlflow/utils.py index ae859a92..53c8f961 100644 --- a/kedro_mlflow/utils.py +++ b/kedro_mlflow/utils.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import List, Union +from typing import Any, List, Union def _parse_requirements(path: Union[str, Path], encoding="utf-8") -> List: @@ -8,3 +8,23 @@ def _parse_requirements(path: Union[str, Path], encoding="utf-8") -> List: x.strip() for x in file_handler if x.strip() and not x.startswith("-r") ] return requirements + + +def _is_project(project_path: Union[str, Path]) -> bool: + try: + # untested in the CI, for retrocompatiblity with kedro >=0.19.0,<0.19.3 + from kedro.framework.startup import _is_project as _ip + except ImportError: + from kedro.utils import _is_project as _ip + + return _ip(project_path) + + +def _find_kedro_project(current_dir: Path) -> Any: + try: + # untested in the CI, for retrocompatiblity with kedro >=0.19.0,<0.19.3 + from kedro.framework.startup import _find_kedro_project as _fkp + except ImportError: + from kedro.utils import _find_kedro_project as _fkp + + return _fkp(current_dir) diff --git a/tests/framework/cli/test_cli.py b/tests/framework/cli/test_cli.py index 0c10d1b9..278cc7a5 100644 --- a/tests/framework/cli/test_cli.py +++ b/tests/framework/cli/test_cli.py @@ -64,16 +64,15 @@ def test_cli_global_discovered(monkeypatch, tmp_path): # because discovery mechanisme is linked to setup.py -## This command is temporarlily deactivated beacuse of a bug in kedro==0.17.3, see: https://github.com/Galileo-Galilei/kedro-mlflow/issues/193 -# def test_mlflow_commands_outside_kedro_project(monkeypatch, tmp_path): -# monkeypatch.chdir(tmp_path) -# cli_runner = CliRunner() -# result = cli_runner.invoke(cli_mlflow) -# assert {"new"} == set(extract_cmd_from_help(result.output)) - +@pytest.mark.parametrize("inside_subdirectory", (True, False)) +def test_mlflow_commands_inside_kedro_project( + monkeypatch, kedro_project, inside_subdirectory +): + if inside_subdirectory is True: + monkeypatch.chdir(kedro_project / "src") + else: + monkeypatch.chdir(kedro_project) -def test_mlflow_commands_inside_kedro_project(monkeypatch, kedro_project): - monkeypatch.chdir(kedro_project) # launch the command to initialize the project cli_runner = CliRunner() result = cli_runner.invoke(cli_mlflow) @@ -81,9 +80,13 @@ def test_mlflow_commands_inside_kedro_project(monkeypatch, kedro_project): assert "You have not updated your template yet" not in result.output -def test_cli_init(monkeypatch, kedro_project): +@pytest.mark.parametrize("inside_subdirectory", (True, False)) +def test_cli_init(monkeypatch, kedro_project, inside_subdirectory): # "kedro_project" is a pytest.fixture declared in conftest - monkeypatch.chdir(kedro_project) + if inside_subdirectory is True: + monkeypatch.chdir(kedro_project / "src") + else: + monkeypatch.chdir(kedro_project) cli_runner = CliRunner() result = cli_runner.invoke(cli_init) @@ -177,6 +180,7 @@ def test_cli_init_with_env(monkeypatch, kedro_project, env): ) def test_cli_init_with_wrong_env(monkeypatch, kedro_project, env): # "kedro_project" is a pytest.fixture declared in conftest + monkeypatch.chdir(kedro_project) cli_runner = CliRunner() result = cli_runner.invoke(cli_init, f"--env {env}") @@ -189,8 +193,15 @@ def test_cli_init_with_wrong_env(monkeypatch, kedro_project, env): # I tried mimicking mlflow_cli with mock but did not achieve desired result # other solution is to use pytest-xprocess # TODO: create an initlaized_kedro_project fixture with a global scope -def test_ui_is_up(monkeypatch, mocker, kedro_project_with_mlflow_conf): - monkeypatch.chdir(kedro_project_with_mlflow_conf) +@pytest.mark.parametrize("inside_subdirectory", (True, False)) +def test_cli_ui_is_up( + monkeypatch, mocker, kedro_project_with_mlflow_conf, inside_subdirectory +): + if inside_subdirectory is True: + monkeypatch.chdir(kedro_project_with_mlflow_conf / "src") + else: + monkeypatch.chdir(kedro_project_with_mlflow_conf) + cli_runner = CliRunner() # This does not test anything : the goal is to check whether it raises an error diff --git a/tests/framework/cli/test_cli_modelify.py b/tests/framework/cli/test_cli_modelify.py index 434fd5c9..d69e361c 100644 --- a/tests/framework/cli/test_cli_modelify.py +++ b/tests/framework/cli/test_cli_modelify.py @@ -131,9 +131,6 @@ def kp_for_modelify_persistent_input(kp_for_modelify): @pytest.fixture def kp_for_modelify_with_parameters(tmp_path): - # TODO: find a better way to inject dynamically - # the templated config loader without modifying the template - _FAKE_MODELIFY_PROJECT_NAME = r"kp_for_modelify_with_params" config = { # "output_dir": tmp_path, @@ -203,43 +200,55 @@ def register_pipelines(): mlflow_tracking_uri: mlruns """ - kp_for_modelify = tmp_path / config["repo_name"] + kp_for_modelify_with_parameters = tmp_path / config["repo_name"] _write_file( - kp_for_modelify / "src" / config["python_package"] / "pipeline_registry.py", + kp_for_modelify_with_parameters + / "src" + / config["python_package"] + / "pipeline_registry.py", pipeline_registry_py, ) _write_file( - kp_for_modelify / "conf" / "base" / "catalog.yml", + kp_for_modelify_with_parameters / "conf" / "base" / "catalog.yml", catalog_yml, ) _write_file( - kp_for_modelify / "conf" / "base" / "parameters.yml", + kp_for_modelify_with_parameters / "conf" / "base" / "parameters.yml", parameters_yml, ) _write_file( - kp_for_modelify / "conf" / "base" / "mlflow.yml", + kp_for_modelify_with_parameters / "conf" / "base" / "mlflow.yml", mlflow_yml, ) - return kp_for_modelify + return kp_for_modelify_with_parameters @pytest.mark.parametrize( - "example_repo,artifacts_list", + "example_repo,artifacts_list,inside_subdirectory", [ - (pytest.lazy_fixture("kp_for_modelify"), ["trained_model"]), + (pytest.lazy_fixture("kp_for_modelify"), ["trained_model"], False), + (pytest.lazy_fixture("kp_for_modelify"), ["trained_model"], True), ( pytest.lazy_fixture("kp_for_modelify_with_parameters"), ["trained_model", "params:my_param"], + False, ), ], ) -def test_modelify_logs_in_mlflow(monkeypatch, example_repo, artifacts_list): - monkeypatch.chdir(example_repo) +def test_modelify_logs_in_mlflow_even_inside_subdirectory( + monkeypatch, example_repo, artifacts_list, inside_subdirectory +): + if inside_subdirectory is True: + project_cwd = example_repo / "src" + else: + project_cwd = example_repo - bootstrap_project(Path().cwd()) - with KedroSession.create(project_path=Path().cwd()) as session: + monkeypatch.chdir(project_cwd) + + bootstrap_project(example_repo) + with KedroSession.create(project_path=example_repo) as session: context = session.load_context() catalog = context.catalog catalog.save("trained_model", 2)