Skip to content

Commit

Permalink
fetch: introduce --type metrics/plots
Browse files Browse the repository at this point in the history
Needed for https://github.com/iterative/studio/pull/6541 but is also generally useful.
  • Loading branch information
efiop committed Jul 9, 2023
1 parent 2ef2caa commit 119075f
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 0 deletions.
12 changes: 12 additions & 0 deletions dvc/commands/data_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def run(self):
recursive=self.args.recursive,
run_cache=self.args.run_cache,
max_size=self.args.max_size,
types=self.args.types,
)
self.log_summary({"fetched": processed_files_count})
except DvcException:
Expand Down Expand Up @@ -328,6 +329,17 @@ def add_parser(subparsers, _parent_parser):
type=int,
help="Fetch data files/directories that are each below specified size (bytes).",
)
fetch_parser.add_argument(
"--type",
dest="types",
action="append",
default=[],
help=(
"Only Fetch data files/directories that are of a particular "
"type (metrics, plots)."
),
choices=["metrics", "plots"],
)
fetch_parser.set_defaults(func=CmdDataFetch)

# Status
Expand Down
2 changes: 2 additions & 0 deletions dvc/repo/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def fetch( # noqa: C901, PLR0913
run_cache=False,
revs=None,
max_size=None,
types=None,
) -> int:
"""Download data items from a cloud and imported repositories
Expand Down Expand Up @@ -72,6 +73,7 @@ def fetch( # noqa: C901, PLR0913
with_deps=with_deps,
recursive=recursive,
max_size=max_size,
types=types,
)
index_keys.add(idx.data_tree.hash_info.value)
indexes.append(idx.data["repo"])
Expand Down
66 changes: 66 additions & 0 deletions dvc/repo/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,50 @@ def data_keys(self) -> Dict[str, Set["DataIndexKey"]]:

return dict(by_workspace)

@cached_property
def metric_keys(self) -> Dict[str, Set["DataIndexKey"]]:
from collections import defaultdict

from .metrics.show import _collect_top_level_metrics

by_workspace: Dict[str, Set["DataIndexKey"]] = defaultdict(set)

by_workspace["repo"] = set()

for out in self.outs:
if not out.metric:
continue

workspace, key = out.index_key
by_workspace[workspace].add(key)

for path in _collect_top_level_metrics(self.repo):
key = self.repo.fs.path.relparts(path, self.repo.root_dir)
by_workspace["repo"].add(key)

return dict(by_workspace)

@cached_property
def plot_keys(self) -> Dict[str, Set["DataIndexKey"]]:
from collections import defaultdict

by_workspace: Dict[str, Set["DataIndexKey"]] = defaultdict(set)

by_workspace["repo"] = set()

for out in self.outs:
if not out.plot:
continue

workspace, key = out.index_key
by_workspace[workspace].add(key)

for path in self._plot_sources:
key = self.repo.fs.path.relparts(path, self.repo.root_dir)
by_workspace["repo"].add(key)

return dict(by_workspace)

@cached_property
def data_tree(self):
from dvc_data.hashfile.tree import Tree
Expand Down Expand Up @@ -487,12 +531,31 @@ def used_objs(
used[odb].update(objs)
return used

def _types_filter(self, types, out):
ws, okey = out.index_key
for typ in types:
if typ == "plots":
keys = self.plot_keys
elif typ == "metrics":
keys = self.metric_keys
else:
raise ValueError(f"unsupported type {typ}")

for key in keys.get(ws, []):
if (len(key) >= len(okey) and key[: len(okey)] == okey) or (
len(key) < len(okey) and okey[: len(key)] == key
):
return True

return False

def targets_view(
self,
targets: Optional["TargetType"],
stage_filter: Optional[Callable[["Stage"], bool]] = None,
outs_filter: Optional[Callable[["Output"], bool]] = None,
max_size: Optional[int] = None,
types: Optional[List[str]] = None,
**kwargs: Any,
) -> "IndexView":
"""Return read-only view of index for the specified targets.
Expand Down Expand Up @@ -520,6 +583,9 @@ def _outs_filter(out):
if max_size and out.meta and out.meta.size and out.meta.size >= max_size:
return False

if types and not self._types_filter(types, out):
return False

if outs_filter:
return outs_filter(out)

Expand Down

0 comments on commit 119075f

Please sign in to comment.