Skip to content

Commit

Permalink
save import hash info to state
Browse files Browse the repository at this point in the history
  • Loading branch information
Dave Berenbaum committed Aug 19, 2024
1 parent e42d094 commit 03c8959
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 32 deletions.
37 changes: 13 additions & 24 deletions dvc/dependency/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import voluptuous as vol

from dvc.prompt import confirm
from dvc.utils import as_posix

from .base import Dependency
Expand Down Expand Up @@ -94,29 +93,19 @@ def dumpd(self, **kwargs) -> dict[str, Union[str, dict[str, str]]]:
}

def download(self, to: "Output", jobs: Optional[int] = None):
from dvc_data.hashfile.build import build
from dvc_data.hashfile.checkout import CheckoutError, checkout

try:
repo = self._make_fs(locked=True).repo

_, _, obj = build(
repo.cache.local,
self.fs_path,
repo.dvcfs,
repo.cache.local.fs.PARAM_CHECKSUM,
)
checkout(
to.fs_path,
to.fs,
obj,
self.repo.cache.local,
ignore=None,
state=self.repo.state,
prompt=confirm,
)
except (CheckoutError, FileNotFoundError):
super().download(to=to, jobs=jobs)
super().download(to=to, jobs=jobs)

# Save hash info to output state.
fs_info = self.fs.info(self.fs_path)
if fs_info["type"] == "directory":
for _, _, files in self.fs.walk(self.fs_path, detail=True):
for file, file_info in files.items():
path = f"{to.fs_path}{to.fs.sep}{file}"
hash_info = file_info["dvc_info"]["entry"].hash_info
to.cache.state.save(path, to.fs, hash_info)
else:
hash_info = fs_info["dvc_info"]["entry"].hash_info
to.cache.state.save(to.fs_path, to.fs, hash_info)

def update(self, rev: Optional[str] = None):
if rev:
Expand Down
14 changes: 6 additions & 8 deletions tests/func/test_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@

from dvc.cachemgr import CacheManager
from dvc.config import NoRemoteError
from dvc.dependency import base
from dvc.dvcfile import load_file
from dvc.fs import system
from dvc.scm import Git
from dvc.stage.exceptions import StagePathNotFoundError
from dvc.testing.tmp_dir import make_subrepo
from dvc.utils.fs import remove
from dvc_data.hashfile import hash
from dvc_data.index.index import DataIndexDirError


Expand Down Expand Up @@ -725,14 +725,12 @@ def test_import_invalid_configs(tmp_dir, scm, dvc, erepo_dir):
)


def test_reimport(tmp_dir, scm, dvc, erepo_dir, mocker):
def test_import_no_hash(tmp_dir, scm, dvc, erepo_dir, mocker):
with erepo_dir.chdir():
erepo_dir.dvc_gen("foo", "foo content", commit="create foo")

spy = mocker.spy(base, "fs_download")
spy = mocker.spy(hash, "file_md5")
dvc.imp(os.fspath(erepo_dir), "foo", "foo_imported")
assert spy.called

spy.reset_mock()
dvc.imp(os.fspath(erepo_dir), "foo", "foo_imported", force=True)
assert not spy.called
out_path = (tmp_dir / "foo_imported").as_posix()
for call in spy.call_args_list:
assert out_path != call.args[0]

0 comments on commit 03c8959

Please sign in to comment.