From 5f83c222cf96b52bb3ea541ba21a7f479f40ddcf Mon Sep 17 00:00:00 2001
From: Thomas Sibley <tsibley@fredhutch.org>
Date: Wed, 17 Jan 2024 14:53:21 -0800
Subject: [PATCH] remote.nextstrain_dot_org: Support downloading of core
 datasets which aren't in the manifest
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows downloading of datasets like

    https://nextstrain.org/enterovirus/d68/vp1/2020-01-23
    https://nextstrain.org/nextclade/sars-cov-2/21L

and others, as reasonably expected.¹  It also will, with one more minor
tweak to follow, allow downloading of past snapshots of resources (e.g.
/zika@2023-01-01).

Switches from an assert on expected media type to a conditional
UserError, supported by the new Resource.__str__() method, since for
single resource downloads we no longer have the assurance of knowing it
exists already.

¹ <https://bedfordlab.slack.com/archives/C01LCTT7JNN/p1667970567194279>
---
 nextstrain/cli/remote/nextstrain_dot_org.py | 32 +++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/nextstrain/cli/remote/nextstrain_dot_org.py b/nextstrain/cli/remote/nextstrain_dot_org.py
index 3ba7bd32..b7a0ae4c 100644
--- a/nextstrain/cli/remote/nextstrain_dot_org.py
+++ b/nextstrain/cli/remote/nextstrain_dot_org.py
@@ -132,6 +132,21 @@ class SubResource(NamedTuple):
     file_extension: str
     primary: bool = False
 
+    def __str__(self) -> str:
+        type, subtype = self.media_type.split("/", 1)
+        subtype_sans_suffix, *_ = subtype.split("+", 1)
+        subtype_tree = tuple(subtype_sans_suffix.split("."))
+
+        resource = (
+            "dataset"   if subtype_tree[0:3] == ("vnd", "nextstrain", "dataset")   else
+            "narrative" if subtype_tree[0:3] == ("vnd", "nextstrain", "narrative") else
+            self.media_type
+        )
+
+        sidecar = sidecar_suffix(self.media_type)
+
+        return f"{resource} ({sidecar})" if sidecar else resource
+
 
 class Dataset(Resource):
     """
@@ -327,7 +342,18 @@ def download(url: URL, local_path: Path, recursively: bool = False, dry_run: boo
     with requests.Session() as http:
         http.auth = auth(origin)
 
-        resources = _ls(origin, path, recursively = recursively, http = http)
+        if recursively:
+            resources = _ls(origin, path, recursively = recursively, http = http)
+        else:
+            # Avoid the query and just try to download the single resource.
+            # This saves a request for single-dataset (or narrative) downloads,
+            # but also allows downloading core datasets which aren't in the
+            # manifest.  (At least until the manifest goes away.)
+            #   -trs, 9 Nov 2022
+            if narratives_only(path):
+                resources = [Narrative(str(path))]
+            else:
+                resources = [Dataset(str(path))]
 
         if not resources:
             raise UserError(f"Path {path} does not seem to exist")
@@ -352,7 +378,9 @@ def download(url: URL, local_path: Path, recursively: bool = False, dry_run: boo
 
                     # Check for bad response
                     raise_for_status(response)
-                    assert content_media_type(response) == subresource.media_type
+
+                    if content_media_type(response) != subresource.media_type:
+                        raise UserError(f"Path {path} does not seem to be a {subresource}.")
 
                     # Local destination
                     if local_path.is_dir():