Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

repo: add API and CLI for reading artifacts #9770

Merged
merged 8 commits into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions dvc/api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dvc.fs.dvc import _DVCFileSystem as DVCFileSystem

from .artifacts import artifacts_show
from .data import open # pylint: disable=redefined-builtin
from .data import get_url, read
from .experiments import exp_save, exp_show
Expand All @@ -10,6 +11,7 @@
"all_branches",
"all_commits",
"all_tags",
"artifacts_show",
"exp_save",
"exp_show",
"get_url",
Expand Down
47 changes: 47 additions & 0 deletions dvc/api/artifacts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from typing import Any, Dict, Optional

from dvc.repo import Repo


def artifacts_show(
name: str,
version: Optional[str] = None,
stage: Optional[str] = None,
repo: Optional[str] = None,
) -> Dict[str, str]:
"""
Return path and Git revision for an artifact in a DVC project.

The resulting path and revision can be used in conjunction with other dvc.api
calls to open and read the artifact.

Args:
name (str): name of the artifact to open.
version (str, optional): version of the artifact to open. Defaults to
the latest version.
stage (str, optional): name of the model registry stage.
Comment on lines +19 to +22
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think all these options are actually mutually exclusive. Would be good to explicitly handle that early in the code.

I am a little confused about the GTO code as it just silently overrides tags in:

https://github.com/iterative/gto/blob/c82563d988ea927d9cd0275bb0c2f288dd73e0b6/gto/tag.py#L158C13-L163

Copy link
Contributor Author

@pmrowla pmrowla Jul 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, it seems like they are supposed to be mutually exclusive, but I wasn't sure either since GTO doesn't check for it.

CC: @aguschin

repo: (str, optional): path or URL for the DVC repo.

Returns:
Dictionary of the form:
{
"rev": ...,
"path": ...,
}

Raises:
dvc.exceptions.ArtifactNotFoundError: The specified artifact was not found in
the repo.
"""
if version and stage:
raise ValueError("Artifact version and stage are mutually exclusive.")

repo_kwargs: Dict[str, Any] = {
"subrepos": True,
"uninitialized": True,
}
with Repo.open(repo, **repo_kwargs) as _repo:
rev = _repo.artifacts.get_rev(name, version=version, stage=stage)
with _repo.switch(rev):
path = _repo.artifacts.get_path(name)
return {"rev": rev, "path": path}
2 changes: 2 additions & 0 deletions dvc/cli/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from dvc import __version__
from dvc.commands import (
add,
artifacts,
cache,
check_ignore,
checkout,
Expand Down Expand Up @@ -89,6 +90,7 @@
experiments,
check_ignore,
data,
artifacts,
]


Expand Down
133 changes: 133 additions & 0 deletions dvc/commands/artifacts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import argparse
import logging

from dvc.cli import completion
from dvc.cli.command import CmdBaseNoRepo
from dvc.cli.utils import DictAction, append_doc_link, fix_subparsers
from dvc.exceptions import DvcException

logger = logging.getLogger(__name__)


class CmdArtifactsGet(CmdBaseNoRepo):
def run(self):
from dvc.repo.artifacts import Artifacts
from dvc.scm import CloneError
from dvc.ui import ui

try:
count, out = Artifacts.get(
self.args.url,
name=self.args.name,
version=self.args.rev,
stage=self.args.stage,
force=self.args.force,
config=self.args.config,
remote=self.args.remote,
remote_config=self.args.remote_config,
out=self.args.out,
)
ui.write(f"Downloaded {count} file(s) to '{out}'")
return 0
except CloneError:
logger.exception("failed to get '%s'", self.args.name)
return 1
except DvcException:
logger.exception(
"failed to get '%s' from '%s'", self.args.name, self.args.url
)
return 1


def add_parser(subparsers, parent_parser):
ARTIFACTS_HELP = "DVC model registry artifact commands."

artifacts_parser = subparsers.add_parser(
"artifacts",
parents=[parent_parser],
description=append_doc_link(ARTIFACTS_HELP, "artifacts"),
help=ARTIFACTS_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
artifacts_subparsers = artifacts_parser.add_subparsers(
dest="cmd",
help="Use `dvc artifacts CMD --help` to display command-specific help.",
)
fix_subparsers(artifacts_subparsers)

ARTIFACTS_GET_HELP = "Download an artifact from a DVC project."
get_parser = artifacts_subparsers.add_parser(
"get",
parents=[parent_parser],
description=append_doc_link(ARTIFACTS_GET_HELP, "artifacts/get"),
help=ARTIFACTS_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
get_parser.add_argument("url", help="Location of DVC repository to download from")
dberenbaum marked this conversation as resolved.
Show resolved Hide resolved
get_parser.add_argument(
"name", help="Name of artifact in the repository"
).complete = completion.FILE
get_parser.add_argument(
"--rev",
nargs="?",
help="Artifact version",
metavar="<version>",
)
Comment on lines +70 to +75
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can't use --version since it conflicts with the global DVC version flag, so I went with --rev for now.

get_parser.add_argument(
"--stage",
nargs="?",
help="Artifact stage",
metavar="<stage>",
)
get_parser.add_argument(
"-o",
"--out",
nargs="?",
help="Destination path to download artifact to",
metavar="<path>",
).complete = completion.DIR
get_parser.add_argument(
"-j",
"--jobs",
type=int,
help=(
"Number of jobs to run simultaneously. "
"The default value is 4 * cpu_count(). "
),
metavar="<number>",
)
get_parser.add_argument(
"-f",
"--force",
action="store_true",
default=False,
help="Override local file or folder if exists.",
)
get_parser.add_argument(
"--config",
type=str,
help=(
"Path to a config file that will be merged with the config "
"in the target repository."
),
)
get_parser.add_argument(
"--remote",
type=str,
help=(
"Remote name to set as a default in the target repository "
"(only applicable when downloading from DVC remote)."
),
)
get_parser.add_argument(
"--remote-config",
type=str,
nargs="*",
action=DictAction,
help=(
"Remote config options to merge with a remote's config (default or one "
"specified by '--remote') in the target repository (only applicable "
"when downloading from DVC remote)."
),
)
get_parser.set_defaults(func=CmdArtifactsGet)
23 changes: 22 additions & 1 deletion dvc/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Exceptions raised by the dvc."""
import errno
from typing import TYPE_CHECKING, Dict, List, Set
from typing import TYPE_CHECKING, Dict, List, Optional, Set

from dvc.utils import format_link

Expand Down Expand Up @@ -336,3 +336,24 @@ def __init__(self, fs_paths):
class PrettyDvcException(DvcException):
def __pretty_exc__(self, **kwargs):
"""Print prettier exception message."""


class ArtifactNotFoundError(DvcException):
"""Thrown if an artifact is not found in the DVC repo.

Args:
name (str): artifact name.
"""

def __init__(
self,
name: str,
version: Optional[str] = None,
stage: Optional[str] = None,
):
self.name = name
self.version = version
self.stage = stage

desc = f" @ {stage or version}" if (stage or version) else ""
super().__init__(f"Unable to find artifact '{name}{desc}'")
8 changes: 6 additions & 2 deletions dvc/fs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@
# pylint: enable=unused-import


def download(fs: "FileSystem", fs_path: str, to: str, jobs: Optional[int] = None):
def download(
fs: "FileSystem", fs_path: str, to: str, jobs: Optional[int] = None
) -> int:
with Callback.as_tqdm_callback(
desc=f"Downloading {fs.path.name(fs_path)}",
unit="files",
Expand All @@ -63,7 +65,8 @@ def download(fs: "FileSystem", fs_path: str, to: str, jobs: Optional[int] = None
if not path.endswith(fs.path.flavour.sep)
]
if not from_infos:
return localfs.makedirs(to, exist_ok=True)
localfs.makedirs(to, exist_ok=True)
return 0
to_infos = [
localfs.path.join(to, *fs.path.relparts(info, fs_path))
for info in from_infos
Expand All @@ -82,6 +85,7 @@ def download(fs: "FileSystem", fs_path: str, to: str, jobs: Optional[int] = None
callback=cb,
batch_size=jobs,
)
return len(to_infos)


def parse_external_url(url, config=None):
Expand Down
Loading