Skip to content

Commit

Permalink
Cleanup URL parsing mechanisms
Browse files Browse the repository at this point in the history
  • Loading branch information
u10313335 committed Jun 27, 2024
1 parent 32a5341 commit 4ad04ea
Showing 1 changed file with 16 additions and 11 deletions.
27 changes: 16 additions & 11 deletions binderhub/repoproviders.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,24 +471,29 @@ def __init__(self, *args, **kwargs):
async def get_resolved_ref(self):
parsed_repo = urlparse(self.repo)

url_parts_1 = parsed_repo.path.split("/history/")
url_parts_2 = url_parts_1[0].split("/")
if url_parts_2[-2] == "dataset":
self.dataset_id = url_parts_2[-1]
else:
if "/dataset/" not in parsed_repo.path:
# Not actually a dataset
return None

api_url_path = "/api/3/action/"
# CKAN may be under a URL prefix, and we should accomodate that
url_prefix, dataset_url = parsed_repo.path.split("/dataset/")

dataset_url_parts = dataset_url.split("/")
self.dataset_id = dataset_url_parts[0]

api = parsed_repo._replace(
path="/".join(url_parts_2[:-2]) + api_url_path, query=""
path=f"{url_prefix}/api/3/action/", query=""
).geturl()

# handle the activites
# Activity ID may be present either as a query parameter, activity_id
# or as part of the URL, under `/history/<activity-id>`. If `/history/`
# is present, that takes precedence over `activity_id`
activity_id = None
if parse_qs(parsed_repo.query).get("activity_id") is not None:
if "history" in dataset_url_parts:
activity_id = dataset_url_parts[dataset_url_parts.index("history") + 1]
elif parse_qs(parsed_repo.query).get("activity_id") is not None:
activity_id = parse_qs(parsed_repo.query).get("activity_id")[0]
if len(url_parts_1) == 2:
activity_id = url_parts_1[-1]

if activity_id:
fetch_url = f"{api}activity_data_show?" + urlencode(
{"id": activity_id, "object_type": "package"}
Expand Down

0 comments on commit 4ad04ea

Please sign in to comment.