add benchmarking script

PyO3 · Nov 17, 2024 · ae3add5 · ae3add5
1 parent 8404f6a
commit ae3add5
Show file tree

Hide file tree

Showing 5 changed files with 162 additions and 3 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -61,6 +61,7 @@ ignore = [
     "RET505",  # superfluous-else-return
     "S101",    # assert
     "S301",    # suspicious-pickle-usage
+    "S311",    # suspicious-non-cryptographic-random-usage
     "S324",    # hashlib-insecure-hash-function
     "S603",    # subprocess-without-shell-equals-true
     "S607",    # start-process-with-partial-path

diff --git a/tests/README.md b/tests/README.md
@@ -66,6 +66,19 @@ Connect to the debugger, eg [using vscode](https://code.visualstudio.com/docs/py
 Note: set `CLEAR_WORKSPACE = False` in `common.py` if you want to prevent the temporary files generated during the test
 from being cleared.
 
+### Benchmarking
+
+The `create_benchmark_data.py` script creates a directory with many python packages to represent a worst case scenario.
+Run the script then run `venv/bin/python run.py` from the created directory.
+
+One way of obtaining profiling information is to run:
+
+```sh
+venv/bin/python -m cProfile -o profile.prof run.py
+pyprof2calltree -i profile.prof -o profile.log
+kcachegrind profile.log
+```
+
 ### Caching
 
 sccache is a tool for caching build artifacts to speed up compilation. Unfortunately, it is currently useless for these

diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/create_benchmark_data.py b/tests/create_benchmark_data.py
@@ -0,0 +1,131 @@
+import argparse
+import logging
+import random
+import string
+import sys
+import textwrap
+from dataclasses import dataclass
+from pathlib import Path
+
+from runner import VirtualEnv
+
+script_dir = Path(__file__).resolve().parent
+repo_root = script_dir.parent
+
+log = logging.getLogger("runner")
+logging.basicConfig(format="[%(name)s] [%(levelname)s] %(message)s", level=logging.DEBUG)
+
+
+@dataclass
+class BenchmarkConfig:
+    seed: int
+    filename_length: int
+    depth: int
+    num_python_editable_packages: int
+
+    @staticmethod
+    def default() -> "BenchmarkConfig":
+        return BenchmarkConfig(
+            seed=0,
+            filename_length=10,
+            depth=10,
+            num_python_editable_packages=100,
+        )
+
+
+def random_name(rng: random.Random, length: int) -> str:
+    return "".join(rng.choices(string.ascii_lowercase, k=length))
+
+
+def random_path(rng: random.Random, root: Path, depth: int, name_length: int) -> Path:
+    path = root
+    for _ in range(depth):
+        path = path / random_name(rng, name_length)
+    return path
+
+
+def create_python_package(root: Path) -> tuple[str, Path]:
+    root.mkdir(parents=True, exist_ok=False)
+    src_dir = root / "src" / root.name
+    src_dir.mkdir(parents=True)
+    (src_dir / "__init__.py").write_text(
+        textwrap.dedent(f"""\
+    def get_name():
+        return "{root.name}"
+    """)
+    )
+    (root / "pyproject.toml").write_text(
+        textwrap.dedent(f"""\
+    [project]
+    name = "{root.name}"
+    version = "0.1.0"
+
+    [tool.setuptools.packages.find]
+    where = ["src"]
+
+    [build-system]
+    requires = ["setuptools", "wheel"]
+    build-backend = "setuptools.build_meta"
+    """)
+    )
+    return root.name, src_dir
+
+
+def create_benchmark_environment(root: Path, config: BenchmarkConfig) -> None:
+    rng = random.Random(config.seed)
+
+    log.info("creating benchmark environment at %s", root)
+    root.mkdir(parents=True, exist_ok=False)
+    venv = VirtualEnv.create(root / "venv", Path(sys.executable))
+
+    venv.install_editable_package(repo_root)
+
+    python_package_names = []
+    python_package_paths = []
+
+    packages_root = random_path(rng, root, config.depth, config.filename_length)
+    name, src_dir = create_python_package(packages_root)
+    python_package_names.append(name)
+    python_package_paths.append(src_dir)
+
+    for _ in range(config.num_python_editable_packages):
+        path = random_path(rng, packages_root, config.depth, config.filename_length)
+        name, src_dir = create_python_package(path)
+        python_package_names.append(name)
+        python_package_paths.append(src_dir)
+
+    python_package_paths_str = ", ".join(f'"{path.parent}"' for path in python_package_paths)
+    import_python_packages = "\n".join(f"import {name}" for name in python_package_names)
+    (root / "run.py").write_text(f"""\
+import time
+import logging
+import sys
+import maturin_import_hook
+
+sys.path.extend([{python_package_paths_str}])
+
+# logging.basicConfig(format='%(asctime)s %(name)s [%(levelname)s] %(message)s', level=logging.DEBUG)
+# maturin_import_hook.reset_logger()
+
+maturin_import_hook.install()
+
+start = time.perf_counter()
+
+{import_python_packages}
+
+end = time.perf_counter()
+print(f'took {{end - start:.6f}}s')
+""")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("root", type=Path, help="the location to write the benchmark data to")
+    args = parser.parse_args()
+
+    config = BenchmarkConfig.default()
+    create_benchmark_environment(args.root, config)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/runner.py b/tests/runner.py
@@ -122,7 +122,7 @@ def _pip_install_command(interpreter_path: Path) -> list[str]:
 
 
 def _create_test_venv(python: Path, venv_dir: Path) -> VirtualEnv:
-    venv = VirtualEnv.new(venv_dir, python)
+    venv = VirtualEnv.create(venv_dir, python)
     log.info("installing test requirements into virtualenv")
     proc = subprocess.run(
         [
@@ -156,13 +156,22 @@ def _create_virtual_env_command(interpreter_path: Path, venv_path: Path) -> list
         return [str(interpreter_path), "-m", "venv", str(venv_path)]
 
 
+def _install_into_virtual_env_command(interpreter_path: Path, package_path: Path) -> list[str]:
+    if shutil.which("uv") is not None:
+        log.info("using uv to install package as editable")
+        return ["uv", "pip", "install", "--python", str(interpreter_path), "--editable", str(package_path)]
+    else:
+        log.info("using pip to install package as editable")
+        return [str(interpreter_path), "-m", "pip", "install", "--editable", str(package_path)]
+
+
 class VirtualEnv:
     def __init__(self, root: Path) -> None:
         self._root = root.resolve()
         self._is_windows = platform.system() == "Windows"
 
     @staticmethod
-    def new(root: Path, interpreter_path: Path) -> VirtualEnv:
+    def create(root: Path, interpreter_path: Path) -> VirtualEnv:
         if root.exists():
             log.info("removing virtualenv at %s", root)
             shutil.rmtree(root)
@@ -194,6 +203,11 @@ def interpreter_path(self) -> Path:
         assert interpreter.exists()
         return interpreter
 
+    def install_editable_package(self, package_path: Path) -> None:
+        cmd = _install_into_virtual_env_command(self.interpreter_path, package_path)
+        proc = subprocess.run(cmd, capture_output=True, check=True)
+        log.debug("%s", proc.stdout.decode())
+
     def activate(self, env: dict[str, str]) -> None:
         """set the environment as-if venv/bin/activate was run"""
         path = env.get("PATH", "").split(os.pathsep)
@@ -254,7 +268,7 @@ def main() -> None:
     parser.add_argument(
         "--name",
         default="Tests",
-        help="the name for the suite of tests this run (use to distinguish between OS/python version)",
+        help="the name to assign for the suite of tests this run (use to distinguish between OS/python version)",
     )
 
     parser.add_argument(