Skip to content

Commit

Permalink
remote.nextstrain_dot_org: Support downloading of core datasets which…
Browse files Browse the repository at this point in the history
… aren't in the manifest

This allows downloading of datasets like

    https://nextstrain.org/enterovirus/d68/vp1/2020-01-23
    https://nextstrain.org/nextclade/sars-cov-2/21L

and others, as reasonably expected.¹  It also will, with one more minor
tweak to follow, allow downloading of past snapshots of resources (e.g.
/zika@2023-01-01).

Switches from an assert on expected media type to a conditional
UserError, supported by the new Resource.__str__() method, since for
single resource downloads we no longer have the assurance of knowing it
exists already.

¹ <https://bedfordlab.slack.com/archives/C01LCTT7JNN/p1667970567194279>
  • Loading branch information
tsibley committed Jan 17, 2024
1 parent 1dfe185 commit 5f83c22
Showing 1 changed file with 30 additions and 2 deletions.
32 changes: 30 additions & 2 deletions nextstrain/cli/remote/nextstrain_dot_org.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,21 @@ class SubResource(NamedTuple):
file_extension: str
primary: bool = False

def __str__(self) -> str:
type, subtype = self.media_type.split("/", 1)
subtype_sans_suffix, *_ = subtype.split("+", 1)
subtype_tree = tuple(subtype_sans_suffix.split("."))

resource = (
"dataset" if subtype_tree[0:3] == ("vnd", "nextstrain", "dataset") else
"narrative" if subtype_tree[0:3] == ("vnd", "nextstrain", "narrative") else
self.media_type
)

sidecar = sidecar_suffix(self.media_type)

return f"{resource} ({sidecar})" if sidecar else resource


class Dataset(Resource):
"""
Expand Down Expand Up @@ -327,7 +342,18 @@ def download(url: URL, local_path: Path, recursively: bool = False, dry_run: boo
with requests.Session() as http:
http.auth = auth(origin)

resources = _ls(origin, path, recursively = recursively, http = http)
if recursively:
resources = _ls(origin, path, recursively = recursively, http = http)
else:
# Avoid the query and just try to download the single resource.
# This saves a request for single-dataset (or narrative) downloads,
# but also allows downloading core datasets which aren't in the
# manifest. (At least until the manifest goes away.)
# -trs, 9 Nov 2022
if narratives_only(path):
resources = [Narrative(str(path))]
else:
resources = [Dataset(str(path))]

if not resources:
raise UserError(f"Path {path} does not seem to exist")
Expand All @@ -352,7 +378,9 @@ def download(url: URL, local_path: Path, recursively: bool = False, dry_run: boo

# Check for bad response
raise_for_status(response)
assert content_media_type(response) == subresource.media_type

if content_media_type(response) != subresource.media_type:
raise UserError(f"Path {path} does not seem to be a {subresource}.")

# Local destination
if local_path.is_dir():
Expand Down

0 comments on commit 5f83c22

Please sign in to comment.