From ae3add5ce959671249b331b8f407dae3d0d2baa4 Mon Sep 17 00:00:00 2001 From: Matthew Broadway Date: Sun, 17 Nov 2024 15:15:03 +0000 Subject: [PATCH] add benchmarking script --- pyproject.toml | 1 + tests/README.md | 13 ++++ tests/__init__.py | 0 tests/create_benchmark_data.py | 131 +++++++++++++++++++++++++++++++++ tests/runner.py | 20 ++++- 5 files changed, 162 insertions(+), 3 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/create_benchmark_data.py diff --git a/pyproject.toml b/pyproject.toml index 6ac45bc..90d3dac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,6 +61,7 @@ ignore = [ "RET505", # superfluous-else-return "S101", # assert "S301", # suspicious-pickle-usage + "S311", # suspicious-non-cryptographic-random-usage "S324", # hashlib-insecure-hash-function "S603", # subprocess-without-shell-equals-true "S607", # start-process-with-partial-path diff --git a/tests/README.md b/tests/README.md index 5da43fb..50ff759 100644 --- a/tests/README.md +++ b/tests/README.md @@ -66,6 +66,19 @@ Connect to the debugger, eg [using vscode](https://code.visualstudio.com/docs/py Note: set `CLEAR_WORKSPACE = False` in `common.py` if you want to prevent the temporary files generated during the test from being cleared. +### Benchmarking + +The `create_benchmark_data.py` script creates a directory with many python packages to represent a worst case scenario. +Run the script then run `venv/bin/python run.py` from the created directory. + +One way of obtaining profiling information is to run: + +```sh +venv/bin/python -m cProfile -o profile.prof run.py +pyprof2calltree -i profile.prof -o profile.log +kcachegrind profile.log +``` + ### Caching sccache is a tool for caching build artifacts to speed up compilation. Unfortunately, it is currently useless for these diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/create_benchmark_data.py b/tests/create_benchmark_data.py new file mode 100644 index 0000000..32c7122 --- /dev/null +++ b/tests/create_benchmark_data.py @@ -0,0 +1,131 @@ +import argparse +import logging +import random +import string +import sys +import textwrap +from dataclasses import dataclass +from pathlib import Path + +from runner import VirtualEnv + +script_dir = Path(__file__).resolve().parent +repo_root = script_dir.parent + +log = logging.getLogger("runner") +logging.basicConfig(format="[%(name)s] [%(levelname)s] %(message)s", level=logging.DEBUG) + + +@dataclass +class BenchmarkConfig: + seed: int + filename_length: int + depth: int + num_python_editable_packages: int + + @staticmethod + def default() -> "BenchmarkConfig": + return BenchmarkConfig( + seed=0, + filename_length=10, + depth=10, + num_python_editable_packages=100, + ) + + +def random_name(rng: random.Random, length: int) -> str: + return "".join(rng.choices(string.ascii_lowercase, k=length)) + + +def random_path(rng: random.Random, root: Path, depth: int, name_length: int) -> Path: + path = root + for _ in range(depth): + path = path / random_name(rng, name_length) + return path + + +def create_python_package(root: Path) -> tuple[str, Path]: + root.mkdir(parents=True, exist_ok=False) + src_dir = root / "src" / root.name + src_dir.mkdir(parents=True) + (src_dir / "__init__.py").write_text( + textwrap.dedent(f"""\ + def get_name(): + return "{root.name}" + """) + ) + (root / "pyproject.toml").write_text( + textwrap.dedent(f"""\ + [project] + name = "{root.name}" + version = "0.1.0" + + [tool.setuptools.packages.find] + where = ["src"] + + [build-system] + requires = ["setuptools", "wheel"] + build-backend = "setuptools.build_meta" + """) + ) + return root.name, src_dir + + +def create_benchmark_environment(root: Path, config: BenchmarkConfig) -> None: + rng = random.Random(config.seed) + + log.info("creating benchmark environment at %s", root) + root.mkdir(parents=True, exist_ok=False) + venv = VirtualEnv.create(root / "venv", Path(sys.executable)) + + venv.install_editable_package(repo_root) + + python_package_names = [] + python_package_paths = [] + + packages_root = random_path(rng, root, config.depth, config.filename_length) + name, src_dir = create_python_package(packages_root) + python_package_names.append(name) + python_package_paths.append(src_dir) + + for _ in range(config.num_python_editable_packages): + path = random_path(rng, packages_root, config.depth, config.filename_length) + name, src_dir = create_python_package(path) + python_package_names.append(name) + python_package_paths.append(src_dir) + + python_package_paths_str = ", ".join(f'"{path.parent}"' for path in python_package_paths) + import_python_packages = "\n".join(f"import {name}" for name in python_package_names) + (root / "run.py").write_text(f"""\ +import time +import logging +import sys +import maturin_import_hook + +sys.path.extend([{python_package_paths_str}]) + +# logging.basicConfig(format='%(asctime)s %(name)s [%(levelname)s] %(message)s', level=logging.DEBUG) +# maturin_import_hook.reset_logger() + +maturin_import_hook.install() + +start = time.perf_counter() + +{import_python_packages} + +end = time.perf_counter() +print(f'took {{end - start:.6f}}s') +""") + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("root", type=Path, help="the location to write the benchmark data to") + args = parser.parse_args() + + config = BenchmarkConfig.default() + create_benchmark_environment(args.root, config) + + +if __name__ == "__main__": + main() diff --git a/tests/runner.py b/tests/runner.py index db58b66..1050b9a 100644 --- a/tests/runner.py +++ b/tests/runner.py @@ -122,7 +122,7 @@ def _pip_install_command(interpreter_path: Path) -> list[str]: def _create_test_venv(python: Path, venv_dir: Path) -> VirtualEnv: - venv = VirtualEnv.new(venv_dir, python) + venv = VirtualEnv.create(venv_dir, python) log.info("installing test requirements into virtualenv") proc = subprocess.run( [ @@ -156,13 +156,22 @@ def _create_virtual_env_command(interpreter_path: Path, venv_path: Path) -> list return [str(interpreter_path), "-m", "venv", str(venv_path)] +def _install_into_virtual_env_command(interpreter_path: Path, package_path: Path) -> list[str]: + if shutil.which("uv") is not None: + log.info("using uv to install package as editable") + return ["uv", "pip", "install", "--python", str(interpreter_path), "--editable", str(package_path)] + else: + log.info("using pip to install package as editable") + return [str(interpreter_path), "-m", "pip", "install", "--editable", str(package_path)] + + class VirtualEnv: def __init__(self, root: Path) -> None: self._root = root.resolve() self._is_windows = platform.system() == "Windows" @staticmethod - def new(root: Path, interpreter_path: Path) -> VirtualEnv: + def create(root: Path, interpreter_path: Path) -> VirtualEnv: if root.exists(): log.info("removing virtualenv at %s", root) shutil.rmtree(root) @@ -194,6 +203,11 @@ def interpreter_path(self) -> Path: assert interpreter.exists() return interpreter + def install_editable_package(self, package_path: Path) -> None: + cmd = _install_into_virtual_env_command(self.interpreter_path, package_path) + proc = subprocess.run(cmd, capture_output=True, check=True) + log.debug("%s", proc.stdout.decode()) + def activate(self, env: dict[str, str]) -> None: """set the environment as-if venv/bin/activate was run""" path = env.get("PATH", "").split(os.pathsep) @@ -254,7 +268,7 @@ def main() -> None: parser.add_argument( "--name", default="Tests", - help="the name for the suite of tests this run (use to distinguish between OS/python version)", + help="the name to assign for the suite of tests this run (use to distinguish between OS/python version)", ) parser.add_argument(