Cleanup URL parsing mechanisms

jupyterhub · Jun 27, 2024 · 4ad04ea · 4ad04ea
1 parent 32a5341
commit 4ad04ea
Showing 1 changed file with 16 additions and 11 deletions.
diff --git a/binderhub/repoproviders.py b/binderhub/repoproviders.py
@@ -471,24 +471,29 @@ def __init__(self, *args, **kwargs):
     async def get_resolved_ref(self):
         parsed_repo = urlparse(self.repo)
 
-        url_parts_1 = parsed_repo.path.split("/history/")
-        url_parts_2 = url_parts_1[0].split("/")
-        if url_parts_2[-2] == "dataset":
-            self.dataset_id = url_parts_2[-1]
-        else:
+        if "/dataset/" not in parsed_repo.path:
+            # Not actually a dataset
             return None
 
-        api_url_path = "/api/3/action/"
+        # CKAN may be under a URL prefix, and we should accomodate that
+        url_prefix, dataset_url = parsed_repo.path.split("/dataset/")
+
+        dataset_url_parts = dataset_url.split("/")
+        self.dataset_id = dataset_url_parts[0]
+
         api = parsed_repo._replace(
-            path="/".join(url_parts_2[:-2]) + api_url_path, query=""
+            path=f"{url_prefix}/api/3/action/", query=""
         ).geturl()
 
-        # handle the activites
+        # Activity ID may be present either as a query parameter, activity_id
+        # or as part of the URL, under `/history/<activity-id>`. If `/history/`
+        # is present, that takes precedence over `activity_id`
         activity_id = None
-        if parse_qs(parsed_repo.query).get("activity_id") is not None:
+        if "history" in dataset_url_parts:
+            activity_id = dataset_url_parts[dataset_url_parts.index("history") + 1]
+        elif parse_qs(parsed_repo.query).get("activity_id") is not None:
             activity_id = parse_qs(parsed_repo.query).get("activity_id")[0]
-        if len(url_parts_1) == 2:
-            activity_id = url_parts_1[-1]
+
         if activity_id:
             fetch_url = f"{api}activity_data_show?" + urlencode(
                 {"id": activity_id, "object_type": "package"}