Skip to content

Commit

Permalink
repo: add API and CLI for reading artifacts (#9770)
Browse files Browse the repository at this point in the history
* deps: add GTO as a dependency

* deps: bump dvc-studio-client to 0.13.0

* fs: return downloaded count in fs.download()

* utils: add type hints for resolve_output

* artifacts: add `dvc artifacts get`

* api: add `api.artifacts_show`

* artifacts get: support --remote, --remote-config

* gto: update to 1.3.0
  • Loading branch information
pmrowla authored Sep 25, 2023
1 parent e8362db commit 12b5725
Show file tree
Hide file tree
Showing 10 changed files with 519 additions and 34 deletions.
2 changes: 2 additions & 0 deletions dvc/api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dvc.fs.dvc import _DVCFileSystem as DVCFileSystem

from .artifacts import artifacts_show
from .data import open # pylint: disable=redefined-builtin
from .data import get_url, read
from .experiments import exp_save, exp_show
Expand All @@ -10,6 +11,7 @@
"all_branches",
"all_commits",
"all_tags",
"artifacts_show",
"exp_save",
"exp_show",
"get_url",
Expand Down
47 changes: 47 additions & 0 deletions dvc/api/artifacts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from typing import Any, Dict, Optional

from dvc.repo import Repo


def artifacts_show(
name: str,
version: Optional[str] = None,
stage: Optional[str] = None,
repo: Optional[str] = None,
) -> Dict[str, str]:
"""
Return path and Git revision for an artifact in a DVC project.
The resulting path and revision can be used in conjunction with other dvc.api
calls to open and read the artifact.
Args:
name (str): name of the artifact to open.
version (str, optional): version of the artifact to open. Defaults to
the latest version.
stage (str, optional): name of the model registry stage.
repo: (str, optional): path or URL for the DVC repo.
Returns:
Dictionary of the form:
{
"rev": ...,
"path": ...,
}
Raises:
dvc.exceptions.ArtifactNotFoundError: The specified artifact was not found in
the repo.
"""
if version and stage:
raise ValueError("Artifact version and stage are mutually exclusive.")

repo_kwargs: Dict[str, Any] = {
"subrepos": True,
"uninitialized": True,
}
with Repo.open(repo, **repo_kwargs) as _repo:
rev = _repo.artifacts.get_rev(name, version=version, stage=stage)
with _repo.switch(rev):
path = _repo.artifacts.get_path(name)
return {"rev": rev, "path": path}
2 changes: 2 additions & 0 deletions dvc/cli/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from dvc import __version__
from dvc.commands import (
add,
artifacts,
cache,
check_ignore,
checkout,
Expand Down Expand Up @@ -89,6 +90,7 @@
experiments,
check_ignore,
data,
artifacts,
]


Expand Down
133 changes: 133 additions & 0 deletions dvc/commands/artifacts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import argparse
import logging

from dvc.cli import completion
from dvc.cli.command import CmdBaseNoRepo
from dvc.cli.utils import DictAction, append_doc_link, fix_subparsers
from dvc.exceptions import DvcException

logger = logging.getLogger(__name__)


class CmdArtifactsGet(CmdBaseNoRepo):
def run(self):
from dvc.repo.artifacts import Artifacts
from dvc.scm import CloneError
from dvc.ui import ui

try:
count, out = Artifacts.get(
self.args.url,
name=self.args.name,
version=self.args.rev,
stage=self.args.stage,
force=self.args.force,
config=self.args.config,
remote=self.args.remote,
remote_config=self.args.remote_config,
out=self.args.out,
)
ui.write(f"Downloaded {count} file(s) to '{out}'")
return 0
except CloneError:
logger.exception("failed to get '%s'", self.args.name)
return 1
except DvcException:
logger.exception(
"failed to get '%s' from '%s'", self.args.name, self.args.url
)
return 1


def add_parser(subparsers, parent_parser):
ARTIFACTS_HELP = "DVC model registry artifact commands."

artifacts_parser = subparsers.add_parser(
"artifacts",
parents=[parent_parser],
description=append_doc_link(ARTIFACTS_HELP, "artifacts"),
help=ARTIFACTS_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
artifacts_subparsers = artifacts_parser.add_subparsers(
dest="cmd",
help="Use `dvc artifacts CMD --help` to display command-specific help.",
)
fix_subparsers(artifacts_subparsers)

ARTIFACTS_GET_HELP = "Download an artifact from a DVC project."
get_parser = artifacts_subparsers.add_parser(
"get",
parents=[parent_parser],
description=append_doc_link(ARTIFACTS_GET_HELP, "artifacts/get"),
help=ARTIFACTS_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
get_parser.add_argument("url", help="Location of DVC repository to download from")
get_parser.add_argument(
"name", help="Name of artifact in the repository"
).complete = completion.FILE
get_parser.add_argument(
"--rev",
nargs="?",
help="Artifact version",
metavar="<version>",
)
get_parser.add_argument(
"--stage",
nargs="?",
help="Artifact stage",
metavar="<stage>",
)
get_parser.add_argument(
"-o",
"--out",
nargs="?",
help="Destination path to download artifact to",
metavar="<path>",
).complete = completion.DIR
get_parser.add_argument(
"-j",
"--jobs",
type=int,
help=(
"Number of jobs to run simultaneously. "
"The default value is 4 * cpu_count(). "
),
metavar="<number>",
)
get_parser.add_argument(
"-f",
"--force",
action="store_true",
default=False,
help="Override local file or folder if exists.",
)
get_parser.add_argument(
"--config",
type=str,
help=(
"Path to a config file that will be merged with the config "
"in the target repository."
),
)
get_parser.add_argument(
"--remote",
type=str,
help=(
"Remote name to set as a default in the target repository "
"(only applicable when downloading from DVC remote)."
),
)
get_parser.add_argument(
"--remote-config",
type=str,
nargs="*",
action=DictAction,
help=(
"Remote config options to merge with a remote's config (default or one "
"specified by '--remote') in the target repository (only applicable "
"when downloading from DVC remote)."
),
)
get_parser.set_defaults(func=CmdArtifactsGet)
23 changes: 22 additions & 1 deletion dvc/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Exceptions raised by the dvc."""
import errno
from typing import TYPE_CHECKING, Dict, List, Set
from typing import TYPE_CHECKING, Dict, List, Optional, Set

from dvc.utils import format_link

Expand Down Expand Up @@ -336,3 +336,24 @@ def __init__(self, fs_paths):
class PrettyDvcException(DvcException):
def __pretty_exc__(self, **kwargs):
"""Print prettier exception message."""


class ArtifactNotFoundError(DvcException):
"""Thrown if an artifact is not found in the DVC repo.
Args:
name (str): artifact name.
"""

def __init__(
self,
name: str,
version: Optional[str] = None,
stage: Optional[str] = None,
):
self.name = name
self.version = version
self.stage = stage

desc = f" @ {stage or version}" if (stage or version) else ""
super().__init__(f"Unable to find artifact '{name}{desc}'")
8 changes: 6 additions & 2 deletions dvc/fs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@
# pylint: enable=unused-import


def download(fs: "FileSystem", fs_path: str, to: str, jobs: Optional[int] = None):
def download(
fs: "FileSystem", fs_path: str, to: str, jobs: Optional[int] = None
) -> int:
with Callback.as_tqdm_callback(
desc=f"Downloading {fs.path.name(fs_path)}",
unit="files",
Expand All @@ -63,7 +65,8 @@ def download(fs: "FileSystem", fs_path: str, to: str, jobs: Optional[int] = None
if not path.endswith(fs.path.flavour.sep)
]
if not from_infos:
return localfs.makedirs(to, exist_ok=True)
localfs.makedirs(to, exist_ok=True)
return 0
to_infos = [
localfs.path.join(to, *fs.path.relparts(info, fs_path))
for info in from_infos
Expand All @@ -82,6 +85,7 @@ def download(fs: "FileSystem", fs_path: str, to: str, jobs: Optional[int] = None
callback=cb,
batch_size=jobs,
)
return len(to_infos)


def parse_external_url(url, config=None):
Expand Down
Loading

0 comments on commit 12b5725

Please sign in to comment.