diff --git a/RELEASE.md b/RELEASE.md index 8584725fb4..bab921cd3b 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,6 +1,7 @@ # Upcoming Release 0.19.7 ## Major features and improvements +* Kedro commands are now lazily loaded to add performance gains when running Kedro commands. ## Bug fixes and other changes * Updated error message for invalid catalog entries. diff --git a/docs/source/kedro_project_setup/session.md b/docs/source/kedro_project_setup/session.md index e1a95bf668..a862a36447 100644 --- a/docs/source/kedro_project_setup/session.md +++ b/docs/source/kedro_project_setup/session.md @@ -67,4 +67,8 @@ This function reads `settings.py` and `pipeline_registry.py` and registers the c #### ValueError: Package name not found > ValueError: Package name not found. Make sure you have configured the project using 'bootstrap_project'. This should happen automatically if you are using Kedro command line interface. -If you are using `multiprocessing`, you need to be careful about this. Depending on your Operating System, you may have [different default](https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods). If the processes are `spawn`, Python will re-import all the modules in each process and thus you need to run `configure_project` again at the start of the new process. For example, this is how Kedro handle this in `ParallelRunner`(https://github.com/kedro-org/kedro/blob/9e883e6a0ba40e3db4497b234dcb3801258e8396/kedro/runner/parallel_runner.py#L84-L85) +If you are using `multiprocessing`, you need to be careful about this. Depending on your Operating System, you may have [different default](https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods). If the processes are `spawn`, Python will re-import all the modules in each process and thus you need to run `configure_project` again at the start of the new process. For example, this is how Kedro handles this in `ParallelRunner`: +```python +if multiprocessing.get_start_method() == "spawn" and package_name: + _bootstrap_subprocess(package_name, logging_config) +``` diff --git a/kedro/framework/cli/cli.py b/kedro/framework/cli/cli.py index addbbfbf54..87bd5c4505 100644 --- a/kedro/framework/cli/cli.py +++ b/kedro/framework/cli/cli.py @@ -15,19 +15,13 @@ from kedro import __version__ as version from kedro.framework.cli import BRIGHT_BLACK, ORANGE -from kedro.framework.cli.catalog import catalog_cli from kedro.framework.cli.hooks import get_cli_hook_manager -from kedro.framework.cli.jupyter import jupyter_cli -from kedro.framework.cli.micropkg import micropkg_cli -from kedro.framework.cli.pipeline import pipeline_cli -from kedro.framework.cli.project import project_group -from kedro.framework.cli.registry import registry_cli -from kedro.framework.cli.starters import create_cli from kedro.framework.cli.utils import ( CONTEXT_SETTINGS, ENTRY_POINT_GROUPS, CommandCollection, KedroCliError, + LazyGroup, _get_entry_points, load_entry_points, ) @@ -51,6 +45,9 @@ def cli() -> None: # pragma: no cover """Kedro is a CLI for creating and using Kedro projects. For more information, type ``kedro info``. + NOTE: If a command from a plugin conflicts with a built-in command from Kedro, + the command from the plugin will take precedence. + """ pass @@ -85,6 +82,38 @@ def info() -> None: click.echo("No plugins installed") +@click.group( + context_settings=CONTEXT_SETTINGS, + cls=LazyGroup, + name="Kedro", + lazy_subcommands={ + "registry": "kedro.framework.cli.registry.registry", + "catalog": "kedro.framework.cli.catalog.catalog", + "ipython": "kedro.framework.cli.project.ipython", + "run": "kedro.framework.cli.project.run", + "micropkg": "kedro.framework.cli.micropkg.micropkg", + "package": "kedro.framework.cli.project.package", + "jupyter": "kedro.framework.cli.jupyter.jupyter", + "pipeline": "kedro.framework.cli.pipeline.pipeline", + }, +) +def project_commands() -> None: + pass # pragma: no cover + + +@click.group( + context_settings=CONTEXT_SETTINGS, + name="Kedro", + cls=LazyGroup, + lazy_subcommands={ + "new": "kedro.framework.cli.starters.new", + "starter": "kedro.framework.cli.starters.starter", + }, +) +def global_commands() -> None: + pass # pragma: no cover + + def _init_plugins() -> None: init_hooks = load_entry_points("init") for init_hook in init_hooks: @@ -125,7 +154,6 @@ def main( self._cli_hook_manager.hook.before_command_run( project_metadata=self._metadata, command_args=args ) - try: super().main( args=args, @@ -178,7 +206,7 @@ def global_groups(self) -> Sequence[click.MultiCommand]: combines them with the built-in ones (eventually overriding the built-in ones if they are redefined by plugins). """ - return [cli, create_cli, *load_entry_points("global")] + return [*load_entry_points("global"), cli, global_commands] @property def project_groups(self) -> Sequence[click.MultiCommand]: @@ -192,15 +220,6 @@ def project_groups(self) -> Sequence[click.MultiCommand]: if not self._metadata: return [] - built_in = [ - catalog_cli, - jupyter_cli, - pipeline_cli, - micropkg_cli, - project_group, - registry_cli, - ] - plugins = load_entry_points("project") try: @@ -209,7 +228,7 @@ def project_groups(self) -> Sequence[click.MultiCommand]: except ModuleNotFoundError: # return only built-in commands and commands from plugins # (plugins can override built-in commands) - return [*built_in, *plugins] + return [*plugins, project_commands] # fail badly if cli.py exists, but has no `cli` in it if not hasattr(project_cli, "cli"): @@ -219,7 +238,7 @@ def project_groups(self) -> Sequence[click.MultiCommand]: user_defined = project_cli.cli # return built-in commands, plugin commands and user defined commands # (overriding happens as follows built-in < plugins < cli.py) - return [*built_in, *plugins, user_defined] + return [user_defined, *plugins, project_commands] def main() -> None: # pragma: no cover diff --git a/kedro/framework/cli/utils.py b/kedro/framework/cli/utils.py index 2d298bd770..7258fb2680 100644 --- a/kedro/framework/cli/utils.py +++ b/kedro/framework/cli/utils.py @@ -120,14 +120,12 @@ def __init__(self, *groups: tuple[str, Sequence[click.MultiCommand]]): for title, cli_list in groups ] sources = list(chain.from_iterable(cli_list for _, cli_list in self.groups)) - help_texts = [ cli.help for cli_collection in sources for cli in cli_collection.sources if cli.help ] - self._dedupe_commands(sources) super().__init__( sources=sources, # type: ignore[arg-type] help="\n\n".join(help_texts), @@ -136,29 +134,6 @@ def __init__(self, *groups: tuple[str, Sequence[click.MultiCommand]]): self.params = sources[0].params self.callback = sources[0].callback - @staticmethod - def _dedupe_commands(cli_collections: Sequence[click.CommandCollection]) -> None: - """Deduplicate commands by keeping the ones from the last source - in the list. - """ - seen_names: set[str] = set() - for cli_collection in reversed(cli_collections): - for cmd_group in reversed(cli_collection.sources): - cmd_group.commands = { # type: ignore[attr-defined] - cmd_name: cmd - for cmd_name, cmd in cmd_group.commands.items() # type: ignore[attr-defined] - if cmd_name not in seen_names - } - seen_names |= cmd_group.commands.keys() # type: ignore[attr-defined] - - # remove empty command groups - for cli_collection in cli_collections: - cli_collection.sources = [ - cmd_group - for cmd_group in cli_collection.sources - if cmd_group.commands # type: ignore[attr-defined] - ] - @staticmethod def _merge_same_name_collections( groups: Sequence[click.MultiCommand], @@ -169,7 +144,6 @@ def _merge_same_name_collections( named_groups[group.name].append(group) # type: ignore[index] if group.help: helps[group.name].append(group.help) # type: ignore[index] - return [ click.CommandCollection( name=group_name, @@ -504,3 +478,42 @@ def _split_load_versions(ctx: click.Context, param: Any, value: str) -> dict[str load_versions_dict[load_version_list[0]] = load_version_list[1] return load_versions_dict + + +class LazyGroup(click.Group): + """A click Group that supports lazy loading of subcommands.""" + + def __init__( + self, + *args: Any, + lazy_subcommands: dict[str, str] | None = None, + **kwargs: Any, + ): + super().__init__(*args, **kwargs) + # lazy_subcommands is a map of the form: + # + # {command-name} -> {module-name}.{command-object-name} + # + self.lazy_subcommands = lazy_subcommands or {} + + def list_commands(self, ctx: click.Context) -> list[str]: + base = list(super().list_commands(ctx)) + lazy = sorted(self.lazy_subcommands.keys()) + return base + lazy + + def get_command( # type: ignore[override] + self, ctx: click.Context, cmd_name: str + ) -> click.BaseCommand | click.Command | None: + if cmd_name in self.lazy_subcommands: + return self._lazy_load(cmd_name) + return super().get_command(ctx, cmd_name) + + def _lazy_load(self, cmd_name: str) -> click.BaseCommand: + # lazily loading a command, first get the module name and attribute name + import_path = self.lazy_subcommands[cmd_name] + modname, cmd_object_name = import_path.rsplit(".", 1) + # do the import + mod = import_module(modname) + # get the Command object from that module + cmd_object = getattr(mod, cmd_object_name) + return cmd_object # type: ignore[no-any-return] diff --git a/tests/framework/cli/test_cli.py b/tests/framework/cli/test_cli.py index e1a0e4c9dd..317a596924 100644 --- a/tests/framework/cli/test_cli.py +++ b/tests/framework/cli/test_cli.py @@ -11,14 +11,13 @@ from kedro import KedroDeprecationWarning from kedro import __version__ as version from kedro.framework.cli import load_entry_points -from kedro.framework.cli.catalog import catalog_cli -from kedro.framework.cli.cli import KedroCLI, _init_plugins, cli -from kedro.framework.cli.jupyter import jupyter_cli -from kedro.framework.cli.micropkg import micropkg_cli -from kedro.framework.cli.pipeline import pipeline_cli -from kedro.framework.cli.project import project_group -from kedro.framework.cli.registry import registry_cli -from kedro.framework.cli.starters import create_cli +from kedro.framework.cli.cli import ( + KedroCLI, + _init_plugins, + cli, + global_commands, + project_commands, +) from kedro.framework.cli.utils import ( CommandCollection, KedroCliError, @@ -335,15 +334,19 @@ def test_project_commands_no_clipy(self, mocker, fake_metadata): side_effect=cycle([ModuleNotFoundError()]), ) kedro_cli = KedroCLI(fake_metadata.project_path) - print(kedro_cli.project_groups) - assert len(kedro_cli.project_groups) == 6 - assert kedro_cli.project_groups == [ - catalog_cli, - jupyter_cli, - pipeline_cli, - micropkg_cli, - project_group, - registry_cli, + # There is only one `LazyGroup` for project commands + assert len(kedro_cli.project_groups) == 1 + assert kedro_cli.project_groups == [project_commands] + # Assert that the lazy commands are listed properly + assert kedro_cli.project_groups[0].list_commands(None) == [ + "catalog", + "ipython", + "jupyter", + "micropkg", + "package", + "pipeline", + "registry", + "run", ] def test_project_commands_no_project(self, mocker, tmp_path): @@ -374,22 +377,20 @@ def test_project_commands_valid_clipy(self, mocker, fake_metadata): return_value=Module(cli=cli), ) kedro_cli = KedroCLI(fake_metadata.project_path) - assert len(kedro_cli.project_groups) == 7 + # The project group will now have two groups, the first from the project's cli.py and + # the second is the lazy project command group + assert len(kedro_cli.project_groups) == 2 assert kedro_cli.project_groups == [ - catalog_cli, - jupyter_cli, - pipeline_cli, - micropkg_cli, - project_group, - registry_cli, cli, + project_commands, ] def test_kedro_cli_no_project(self, mocker, tmp_path): mocker.patch("kedro.framework.cli.cli._is_project", return_value=False) kedro_cli = KedroCLI(tmp_path) assert len(kedro_cli.global_groups) == 2 - assert kedro_cli.global_groups == [cli, create_cli] + # The global groups will be the cli(group for info command) and the global commands (starter and new) + assert kedro_cli.global_groups == [cli, global_commands] result = CliRunner().invoke(kedro_cli, []) @@ -413,28 +414,17 @@ def test_kedro_run_no_project(self, mocker, tmp_path): ) def test_kedro_cli_with_project(self, mocker, fake_metadata): - Module = namedtuple("Module", ["cli"]) mocker.patch("kedro.framework.cli.cli._is_project", return_value=True) mocker.patch( "kedro.framework.cli.cli.bootstrap_project", return_value=fake_metadata ) - mocker.patch( - "kedro.framework.cli.cli.importlib.import_module", - return_value=Module(cli=cli), - ) kedro_cli = KedroCLI(fake_metadata.project_path) assert len(kedro_cli.global_groups) == 2 - assert kedro_cli.global_groups == [cli, create_cli] - assert len(kedro_cli.project_groups) == 7 + assert kedro_cli.global_groups == [cli, global_commands] + assert len(kedro_cli.project_groups) == 1 assert kedro_cli.project_groups == [ - catalog_cli, - jupyter_cli, - pipeline_cli, - micropkg_cli, - project_group, - registry_cli, - cli, + project_commands, ] result = CliRunner().invoke(kedro_cli, []) diff --git a/tests/framework/cli/test_cli_hooks.py b/tests/framework/cli/test_cli_hooks.py index 57f8039e9f..33f13e50fb 100644 --- a/tests/framework/cli/test_cli_hooks.py +++ b/tests/framework/cli/test_cli_hooks.py @@ -6,7 +6,7 @@ import pytest from click.testing import CliRunner -from kedro.framework.cli.cli import KedroCLI, cli +from kedro.framework.cli.cli import KedroCLI from kedro.framework.cli.hooks import cli_hook_impl, get_cli_hook_manager, manager from kedro.framework.startup import ProjectMetadata @@ -83,7 +83,7 @@ def fake_plugin_distribution(mocker): class TestKedroCLIHooks: @pytest.mark.parametrize( "command, exit_code", - [("-V", 0), ("info", 2), ("pipeline list", 2), ("starter", 0)], + [("-V", 0), ("info", 0), ("pipeline list", 2), ("starter", 0)], ) def test_kedro_cli_should_invoke_cli_hooks_from_plugin( self, @@ -97,7 +97,6 @@ def test_kedro_cli_should_invoke_cli_hooks_from_plugin( ): caplog.set_level(logging.DEBUG, logger="kedro") - Module = namedtuple("Module", ["cli"]) mocker.patch( "kedro.framework.cli.cli._is_project", return_value=True, @@ -106,10 +105,6 @@ def test_kedro_cli_should_invoke_cli_hooks_from_plugin( "kedro.framework.cli.cli.bootstrap_project", return_value=fake_metadata, ) - mocker.patch( - "kedro.framework.cli.cli.importlib.import_module", - return_value=Module(cli=cli), - ) kedro_cli = KedroCLI(fake_metadata.project_path) result = CliRunner().invoke(kedro_cli, [command]) assert ( @@ -121,8 +116,7 @@ def test_kedro_cli_should_invoke_cli_hooks_from_plugin( f"Before command `{command}` run for project {fake_metadata}" in result.output ) - - # 'pipeline list' and 'info' aren't actually in the click structure and + # 'pipeline list' isn't actually in the click structure and # return exit code 2 ('invalid usage of some shell built-in command') assert ( f"After command `{command}` run for project {fake_metadata} (exit: {exit_code})"