Skip to content

Commit

Permalink
fs: system: optimize reflink
Browse files Browse the repository at this point in the history
10000 reflinks now take about 1sec instead of 2.7sec
  • Loading branch information
efiop committed Dec 6, 2023
1 parent 8740236 commit dc95f2e
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 10 deletions.
12 changes: 8 additions & 4 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
nox.options.sessions = "lint", "tests"
locations = "src", "tests"

pip_dev_flags = ["--use-pep517"] # reflink package is still missing wheels


@nox.session(python=["3.8", "3.9", "3.10", "3.11", "pypy3.8", "pypy3.9"])
def tests(session: nox.Session) -> None:
session.install(".[tests]")
session.install(".[tests]", *pip_dev_flags)
session.run(
"pytest",
"--cov",
Expand All @@ -24,7 +26,7 @@ def tests(session: nox.Session) -> None:
@nox.session
def lint(session: nox.Session) -> None:
session.install("pre-commit")
session.install("-e", ".[dev]")
session.install("-e", ".[dev]", *pip_dev_flags)

args = *(session.posargs or ("--show-diff-on-failure",)), "--all-files"
session.run("pre-commit", "run", *args)
Expand All @@ -34,7 +36,7 @@ def lint(session: nox.Session) -> None:
@nox.session
def safety(session: nox.Session) -> None:
"""Scan dependencies for insecure packages."""
session.install(".[dev]")
session.install(".[dev]", *pip_dev_flags)
session.install("safety")
session.run("safety", "check", "--full-report")

Expand All @@ -58,4 +60,6 @@ def dev(session: nox.Session) -> None:
session.run("virtualenv", venv_dir, silent=True)

python = os.path.join(venv_dir, "bin/python")
session.run(python, "-m", "pip", "install", "-e", ".[dev]", external=True)
session.run(
python, "-m", "pip", "install", "-e", ".[dev]", *pip_dev_flags, external=True
)
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ tests = [
"mypy==1.5.1",
"pytest-servers[s3]==0.1.3",
"pytest-benchmark",
"reflink",
]
dev = [
"dvc-objects[tests]",
Expand Down Expand Up @@ -99,6 +100,8 @@ ignore_missing_imports = true
module = [
"fsspec.*",
"funcy",
"reflink.*",
"shortuuid",
]

[tool.codespell]
Expand Down
15 changes: 12 additions & 3 deletions src/dvc_objects/fs/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import platform
import stat
import sys
import functools
from typing import TYPE_CHECKING

if TYPE_CHECKING:
Expand Down Expand Up @@ -36,7 +37,8 @@ def symlink(source: "AnyFSPath", link_name: "AnyFSPath") -> None:
os.symlink(source, link_name)


def _reflink_darwin(src: "AnyFSPath", dst: "AnyFSPath") -> None:
@functools.lru_cache(maxsize=1)
def _clonefile():
import ctypes

def _cdll(name):
Expand All @@ -58,13 +60,20 @@ def _cdll(name):
# NOTE: trying to bypass System Integrity Protection (SIP)
clib = _cdll(LIBC_FALLBACK)

if not hasattr(clib, "clonefile"):
clonefile = getattr(clib, "clonefile", None)
if clonefile is None:
raise OSError(
errno.ENOTSUP,
"'clonefile' not supported by the standard library",
)

clonefile = clib.clonefile
return clonefile


def _reflink_darwin(src: "AnyFSPath", dst: "AnyFSPath") -> None:
import ctypes

clonefile = _clonefile()
clonefile.argtypes = [ctypes.c_char_p, ctypes.c_char_p, ctypes.c_int]
clonefile.restype = ctypes.c_int

Expand Down
12 changes: 9 additions & 3 deletions tests/benchmarks/test_fs.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import errno
import pytest
import shutil
from reflink import reflink as pyreflink
from reflink.error import ReflinkImpossibleError
from dvc_objects.fs.system import reflink, hardlink, symlink

NLINKS = 10000


@pytest.mark.parametrize("link", [reflink, hardlink, symlink])
@pytest.mark.parametrize(
"link", [pytest.param(pyreflink, id="pyreflink"), reflink, hardlink, symlink]
)
def test_link(benchmark, tmp_path, link):
(tmp_path / "original").mkdir()

Expand All @@ -28,8 +32,10 @@ def _link():
for idx in range(NLINKS):
try:
link(f"{original}/{idx}", f"{links}/{idx}")
except OSError as exc:
if exc.errno == errno.ENOTSUP:
except Exception as exc:
if isinstance(exc, (ReflinkImpossibleError, NotImplementedError)) or (
isinstance(exc, OSError) and exc.errno == errno.ENOTSUP
):
pytest.skip(str(exc))
raise

Expand Down

0 comments on commit dc95f2e

Please sign in to comment.