From 5a44dbd15a903fc20fdc8d6a3be6d61631b9b2d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Saugat=20Pachhai=20=28=E0=A4=B8=E0=A5=8C=E0=A4=97=E0=A4=BE?= =?UTF-8?q?=E0=A4=A4=29?= Date: Thu, 22 Aug 2024 14:37:40 +0545 Subject: [PATCH] get/import: use in-memory index for remote repositories --- dvc/repo/__init__.py | 3 +++ dvc/repo/open_repo.py | 4 +++- dvc/testing/benchmarks/cli/commands/test_get.py | 2 +- dvc/testing/benchmarks/cli/commands/test_import.py | 3 +-- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 2b89626972..830a5b33c8 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -232,6 +232,7 @@ def __init__( # noqa: PLR0915, PLR0913 Callable[[str, Exception], None] ] = None self._lock_depth: int = 0 + self._is_remote = False def __str__(self): return self.url or self.root_dir @@ -365,6 +366,8 @@ def data_index(self) -> "DataIndex": index_dir = os.path.join(self.site_cache_dir, "index", "data") os.makedirs(index_dir, exist_ok=True) self._data_index = DataIndex.open(os.path.join(index_dir, "db.db")) + if self._is_remote: + self._data_index = DataIndex() return self._data_index diff --git a/dvc/repo/open_repo.py b/dvc/repo/open_repo.py index 43efce100a..d34125963a 100644 --- a/dvc/repo/open_repo.py +++ b/dvc/repo/open_repo.py @@ -40,7 +40,9 @@ def _external_repo(url, rev: Optional[str] = None, **kwargs) -> "Repo": **kwargs, ) - return Repo(**repo_kwargs) + repo = Repo(**repo_kwargs) + repo._is_remote = True + return repo def open_repo(url, *args, **kwargs): diff --git a/dvc/testing/benchmarks/cli/commands/test_get.py b/dvc/testing/benchmarks/cli/commands/test_get.py index be5769e18b..965c0fb89e 100644 --- a/dvc/testing/benchmarks/cli/commands/test_get.py +++ b/dvc/testing/benchmarks/cli/commands/test_get.py @@ -6,4 +6,4 @@ def test_get(bench_dvc, tmp_dir, scm, dvc, make_dataset, remote): dataset = make_dataset( cache=False, files=False, dvcfile=True, commit=True, remote=True ) - bench_dvc("get", tmp_dir, dataset.name, "-o", "new") + bench_dvc("get", f"file://{tmp_dir.as_posix()}", dataset.name, "-o", "new") diff --git a/dvc/testing/benchmarks/cli/commands/test_import.py b/dvc/testing/benchmarks/cli/commands/test_import.py index 4884bed4b6..35ef7aca8c 100644 --- a/dvc/testing/benchmarks/cli/commands/test_import.py +++ b/dvc/testing/benchmarks/cli/commands/test_import.py @@ -1,7 +1,6 @@ import pytest -@pytest.mark.flaky(reruns=3) @pytest.mark.requires( "!=3.53.*,!=3.54.0", reason="Takes 10 mins to run. Regression in 3.53.0, fixed in 3.54.1", @@ -12,4 +11,4 @@ def test_import(bench_dvc, tmp_dir, scm, dvc, make_dataset, remote): dataset = make_dataset( cache=False, files=False, dvcfile=True, commit=True, remote=True ) - bench_dvc("import", tmp_dir, dataset.name, "-o", "new") + bench_dvc("import", f"file://{tmp_dir.as_posix()}", dataset.name, "-o", "new")