From dc95f2e7edeb0984b5c3ef222f04339ba98951b1 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Wed, 6 Dec 2023 02:11:55 +0200 Subject: [PATCH] fs: system: optimize reflink 10000 reflinks now take about 1sec instead of 2.7sec --- noxfile.py | 12 ++++++++---- pyproject.toml | 3 +++ src/dvc_objects/fs/system.py | 15 ++++++++++++--- tests/benchmarks/test_fs.py | 12 +++++++++--- 4 files changed, 32 insertions(+), 10 deletions(-) diff --git a/noxfile.py b/noxfile.py index 9775180..9cb13ff 100644 --- a/noxfile.py +++ b/noxfile.py @@ -8,10 +8,12 @@ nox.options.sessions = "lint", "tests" locations = "src", "tests" +pip_dev_flags = ["--use-pep517"] # reflink package is still missing wheels + @nox.session(python=["3.8", "3.9", "3.10", "3.11", "pypy3.8", "pypy3.9"]) def tests(session: nox.Session) -> None: - session.install(".[tests]") + session.install(".[tests]", *pip_dev_flags) session.run( "pytest", "--cov", @@ -24,7 +26,7 @@ def tests(session: nox.Session) -> None: @nox.session def lint(session: nox.Session) -> None: session.install("pre-commit") - session.install("-e", ".[dev]") + session.install("-e", ".[dev]", *pip_dev_flags) args = *(session.posargs or ("--show-diff-on-failure",)), "--all-files" session.run("pre-commit", "run", *args) @@ -34,7 +36,7 @@ def lint(session: nox.Session) -> None: @nox.session def safety(session: nox.Session) -> None: """Scan dependencies for insecure packages.""" - session.install(".[dev]") + session.install(".[dev]", *pip_dev_flags) session.install("safety") session.run("safety", "check", "--full-report") @@ -58,4 +60,6 @@ def dev(session: nox.Session) -> None: session.run("virtualenv", venv_dir, silent=True) python = os.path.join(venv_dir, "bin/python") - session.run(python, "-m", "pip", "install", "-e", ".[dev]", external=True) + session.run( + python, "-m", "pip", "install", "-e", ".[dev]", *pip_dev_flags, external=True + ) diff --git a/pyproject.toml b/pyproject.toml index 9468d4c..86f29cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ tests = [ "mypy==1.5.1", "pytest-servers[s3]==0.1.3", "pytest-benchmark", + "reflink", ] dev = [ "dvc-objects[tests]", @@ -99,6 +100,8 @@ ignore_missing_imports = true module = [ "fsspec.*", "funcy", + "reflink.*", + "shortuuid", ] [tool.codespell] diff --git a/src/dvc_objects/fs/system.py b/src/dvc_objects/fs/system.py index a86162d..9a17c19 100644 --- a/src/dvc_objects/fs/system.py +++ b/src/dvc_objects/fs/system.py @@ -4,6 +4,7 @@ import platform import stat import sys +import functools from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -36,7 +37,8 @@ def symlink(source: "AnyFSPath", link_name: "AnyFSPath") -> None: os.symlink(source, link_name) -def _reflink_darwin(src: "AnyFSPath", dst: "AnyFSPath") -> None: +@functools.lru_cache(maxsize=1) +def _clonefile(): import ctypes def _cdll(name): @@ -58,13 +60,20 @@ def _cdll(name): # NOTE: trying to bypass System Integrity Protection (SIP) clib = _cdll(LIBC_FALLBACK) - if not hasattr(clib, "clonefile"): + clonefile = getattr(clib, "clonefile", None) + if clonefile is None: raise OSError( errno.ENOTSUP, "'clonefile' not supported by the standard library", ) - clonefile = clib.clonefile + return clonefile + + +def _reflink_darwin(src: "AnyFSPath", dst: "AnyFSPath") -> None: + import ctypes + + clonefile = _clonefile() clonefile.argtypes = [ctypes.c_char_p, ctypes.c_char_p, ctypes.c_int] clonefile.restype = ctypes.c_int diff --git a/tests/benchmarks/test_fs.py b/tests/benchmarks/test_fs.py index b7900a9..58e5aea 100644 --- a/tests/benchmarks/test_fs.py +++ b/tests/benchmarks/test_fs.py @@ -1,12 +1,16 @@ import errno import pytest import shutil +from reflink import reflink as pyreflink +from reflink.error import ReflinkImpossibleError from dvc_objects.fs.system import reflink, hardlink, symlink NLINKS = 10000 -@pytest.mark.parametrize("link", [reflink, hardlink, symlink]) +@pytest.mark.parametrize( + "link", [pytest.param(pyreflink, id="pyreflink"), reflink, hardlink, symlink] +) def test_link(benchmark, tmp_path, link): (tmp_path / "original").mkdir() @@ -28,8 +32,10 @@ def _link(): for idx in range(NLINKS): try: link(f"{original}/{idx}", f"{links}/{idx}") - except OSError as exc: - if exc.errno == errno.ENOTSUP: + except Exception as exc: + if isinstance(exc, (ReflinkImpossibleError, NotImplementedError)) or ( + isinstance(exc, OSError) and exc.errno == errno.ENOTSUP + ): pytest.skip(str(exc)) raise