Skip to content

Commit

Permalink
dvc: don't use realpath where not needed
Browse files Browse the repository at this point in the history
  • Loading branch information
efiop committed Jul 29, 2023
1 parent ca0ad23 commit a133991
Show file tree
Hide file tree
Showing 10 changed files with 47 additions and 25 deletions.
2 changes: 1 addition & 1 deletion dvc/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def __init__(
self.fs = fs or self.wfs

if dvc_dir:
self.dvc_dir = self.fs.path.abspath(self.fs.path.realpath(dvc_dir))
self.dvc_dir = self.fs.path.abspath(dvc_dir)

self.load(
validate=validate, config=config, remote=remote, remote_config=remote_config
Expand Down
4 changes: 4 additions & 0 deletions dvc/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ def __init__(self, output, repo=None):
)


class StageNotFoundError(DvcException):
pass


class StagePathAsOutputError(DvcException):
"""Thrown if directory that stage is going to be saved in is specified as
an output of another stage.
Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ def find_root(cls, root=None, fs=None) -> str:

fs = fs or localfs
root = root or os.curdir
root_dir = fs.path.realpath(root)
root_dir = fs.path.abspath(root)

if not fs.isdir(root_dir):
raise NotDvcRepoError(f"directory '{root}' does not exist")
Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/experiments/executor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,7 +607,7 @@ def _repro_dvc( # noqa: C901
old_cwd = os.getcwd()

for path in copy_paths or []:
cls._copy_path(os.path.realpath(path), os.path.join(dvc.root_dir, path))
cls._copy_path(os.path.abspath(path), os.path.join(dvc.root_dir, path))

if info.wdir:
os.chdir(os.path.join(dvc.scm.root_dir, info.wdir))
Expand Down
6 changes: 6 additions & 0 deletions dvc/repo/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ def check_acyclic(graph: "DiGraph") -> None:

def get_pipeline(pipelines, node):
found = [i for i in pipelines if i.has_node(node)]
if not found:
return None

assert len(found) == 1
return found[0]

Expand Down Expand Up @@ -60,6 +63,9 @@ def collect_pipeline(stage: "Stage", graph: "DiGraph") -> Iterator["Stage"]:
import networkx as nx

pipeline = get_pipeline(get_pipelines(graph), stage)
if not pipeline:
return iter([])

return nx.dfs_postorder_nodes(pipeline, stage)


Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False): # noqa:
"Cannot initialize repo with `--no-scm` and `--subdir`"
)

root_dir = os.path.realpath(root_dir)
root_dir = os.path.abspath(root_dir)
dvc_dir = os.path.join(root_dir, Repo.DVC_DIR)

try:
Expand Down
12 changes: 10 additions & 2 deletions dvc/repo/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,19 @@ class StageInfo(NamedTuple):


def _collect_with_deps(stages: StageList, graph: "DiGraph") -> StageSet:
from dvc.exceptions import StageNotFoundError
from dvc.repo.graph import collect_pipeline

res: StageSet = set()
for stage in stages:
res.update(collect_pipeline(stage, graph=graph))
pl = list(collect_pipeline(stage, graph=graph))
if not pl:
raise StageNotFoundError(
f"Stage {stage} is not found in the project. "
"Check that there are no symlinks in the parents "
"leading up to it within the project."
)
res.update(pl)
return res


Expand Down Expand Up @@ -211,7 +219,7 @@ def _get_filepath(
self, path: Optional[str] = None, name: Optional[str] = None
) -> str:
if path:
return self.repo.fs.path.realpath(path)
return self.repo.fs.path.abspath(path)

path = PROJECT_FILE
logger.debug("Assuming '%s' to be a stage inside '%s'", name, path)
Expand Down
4 changes: 2 additions & 2 deletions dvc/stage/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ def check_stage_path(repo, path, is_wdir=False):
wdir_or_path="stage working dir" if is_wdir else "file path", path=path
)

real_path = os.path.realpath(path)
real_path = os.path.abspath(path)
if not os.path.exists(real_path):
raise StagePathNotFoundError(error_msg.format("does not exist"))

if not os.path.isdir(real_path):
raise StagePathNotDirectoryError(error_msg.format("is not directory"))

proj_dir = os.path.realpath(repo.root_dir)
proj_dir = os.path.abspath(repo.root_dir)
if real_path != proj_dir and not path_isin(real_path, proj_dir):
raise StagePathOutsideError(error_msg.format("is outside of DVC repo"))

Expand Down
16 changes: 3 additions & 13 deletions dvc/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,19 +222,9 @@ def relpath(path, start=os.curdir):
start = os.path.abspath(os.fspath(start))

# Windows path on different drive than curdir doesn't have relpath
if os.name == "nt":
# Since python 3.8 os.realpath resolves network shares to their UNC
# path. So, to be certain that relative paths correctly captured,
# we need to resolve to UNC path first. We resolve only the drive
# name so that we don't follow any 'real' symlinks on the path
def resolve_network_drive_windows(path_to_resolve):
drive, tail = os.path.splitdrive(path_to_resolve)
return os.path.join(os.path.realpath(drive), tail)

path = resolve_network_drive_windows(os.path.abspath(path))
start = resolve_network_drive_windows(start)
if not os.path.commonprefix([start, path]):
return path
if os.name == "nt" and not os.path.commonprefix([start, path]):
return path

return os.path.relpath(path, start)


Expand Down
22 changes: 18 additions & 4 deletions tests/func/test_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,17 +230,31 @@ def test_parent_repo_collect_stages(tmp_dir, scm, dvc):

@pytest.mark.parametrize("with_deps", (False, True))
def test_collect_symlink(tmp_dir, dvc, with_deps):
from dvc.exceptions import StageNotFoundError

tmp_dir.gen({"data": {"foo": "foo contents"}})
foo_path = os.path.join("data", "foo")
dvc.add(foo_path)

data_link = tmp_dir / "data_link"
data_link.symlink_to("data")
stage = list(
dvc.stage.collect(target=str(data_link / "foo.dvc"), with_deps=with_deps)
)[0]

assert stage.addressing == f"{foo_path}.dvc"
if with_deps:
# NOTE: with_deps means that we'll need to collect and use dvcfiles in the repo
# and we currently don't follow symlinks when collecting those, so it will not
# be able to find the target stage.
with pytest.raises(StageNotFoundError):
dvc.stage.collect(target=str(data_link / "foo.dvc"), with_deps=with_deps)
else:
stage = list(
dvc.stage.collect(target=str(data_link / "foo.dvc"), with_deps=with_deps)
)[0]

assert stage.addressing == os.path.join("data_link", "foo.dvc")

stage = list(dvc.stage.collect(target=f"{foo_path}.dvc", with_deps=with_deps))[0]

assert stage.addressing == os.path.join("data", "foo.dvc")


def test_stage_strings_representation(tmp_dir, dvc, run_copy):
Expand Down

0 comments on commit a133991

Please sign in to comment.