diff --git a/binderhub/app.py b/binderhub/app.py index 384752751..2cf9e2b50 100644 --- a/binderhub/app.py +++ b/binderhub/app.py @@ -56,6 +56,7 @@ from .ratelimit import RateLimiter from .registry import DockerRegistry from .repoproviders import ( + CKANProvider, DataverseProvider, FigshareProvider, GistRepoProvider, @@ -586,6 +587,7 @@ def _default_build_namespace(self): "figshare": FigshareProvider, "hydroshare": HydroshareProvider, "dataverse": DataverseProvider, + "ckan": CKANProvider, }, config=True, help=""" diff --git a/binderhub/event-schemas/launch.json b/binderhub/event-schemas/launch.json index 16e277cf4..446182926 100644 --- a/binderhub/event-schemas/launch.json +++ b/binderhub/event-schemas/launch.json @@ -14,7 +14,8 @@ "Zenodo", "Figshare", "Hydroshare", - "Dataverse" + "Dataverse", + "CKAN" ], "description": "Provider for the repository being launched" }, diff --git a/binderhub/main.py b/binderhub/main.py index 2a2027598..f89d23d79 100644 --- a/binderhub/main.py +++ b/binderhub/main.py @@ -22,6 +22,7 @@ "figshare": "Figshare", "hydroshare": "Hydroshare", "dataverse": "Dataverse", + "ckan": "CKAN", } diff --git a/binderhub/repoproviders.py b/binderhub/repoproviders.py index a72cc9387..6e4a0af96 100644 --- a/binderhub/repoproviders.py +++ b/binderhub/repoproviders.py @@ -15,7 +15,7 @@ import time import urllib.parse from datetime import datetime, timedelta, timezone -from urllib.parse import urlparse +from urllib.parse import parse_qs, urlencode, urlparse import escapism from prometheus_client import Gauge @@ -475,6 +475,92 @@ def get_build_slug(self): return f"hydroshare-{self.record_id}" +class CKANProvider(RepoProvider): + """Provide contents of a CKAN dataset + Users must provide a spec consisting of the CKAN dataset URL. + """ + + name = Unicode("CKAN") + + display_name = "CKAN dataset" + + labels = { + "text": "CKAN dataset URL (https://demo.ckan.org/dataset/sample-dataset-1)", + "tag_text": "Git ref (branch, tag, or commit)", + "ref_prop_disabled": True, + "label_prop_disabled": True, + } + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.repo = urllib.parse.unquote(self.spec) + + async def get_resolved_ref(self): + parsed_repo = urlparse(self.repo) + + if "/dataset/" not in parsed_repo.path: + # Not actually a dataset + return None + + # CKAN may be under a URL prefix, and we should accomodate that + url_prefix, dataset_url = parsed_repo.path.split("/dataset/") + + dataset_url_parts = dataset_url.split("/") + self.dataset_id = dataset_url_parts[0] + + api = parsed_repo._replace( + path=f"{url_prefix}/api/3/action/", query="" + ).geturl() + + # Activity ID may be present either as a query parameter, activity_id + # or as part of the URL, under `/history/`. If `/history/` + # is present, that takes precedence over `activity_id` + activity_id = None + if "history" in dataset_url_parts: + activity_id = dataset_url_parts[dataset_url_parts.index("history") + 1] + elif parse_qs(parsed_repo.query).get("activity_id") is not None: + activity_id = parse_qs(parsed_repo.query).get("activity_id")[0] + + if activity_id: + fetch_url = f"{api}activity_data_show?" + urlencode( + {"id": activity_id, "object_type": "package"} + ) + else: + fetch_url = f"{api}package_show?" + urlencode({"id": self.dataset_id}) + + client = AsyncHTTPClient() + try: + r = await client.fetch(fetch_url, user_agent="BinderHub") + except HTTPError: + return None + + json_response = json.loads(r.body) + date = json_response["result"]["metadata_modified"] + parsed_date = datetime.strptime(date, "%Y-%m-%dT%H:%M:%S.%f") + epoch = parsed_date.replace(tzinfo=timezone(timedelta(0))).timestamp() + # truncate the timestamp + dataset_version = str(int(epoch)) + + self.record_id = f"{self.dataset_id}.v{dataset_version}" + + return self.record_id + + async def get_resolved_spec(self): + if not hasattr(self, "record_id"): + await self.get_resolved_ref() + return self.repo + + def get_repo_url(self): + return self.repo + + async def get_resolved_ref_url(self): + resolved_spec = await self.get_resolved_spec() + return resolved_spec + + def get_build_slug(self): + return f"ckan-{self.dataset_id}" + + class GitRepoProvider(RepoProvider): """Bare bones git repo provider. diff --git a/binderhub/static/js/src/form.js b/binderhub/static/js/src/form.js index cc00d7b45..1bf70e6f1 100644 --- a/binderhub/static/js/src/form.js +++ b/binderhub/static/js/src/form.js @@ -31,7 +31,8 @@ export function getBuildFormValues() { providerPrefix === "zenodo" || providerPrefix === "figshare" || providerPrefix === "dataverse" || - providerPrefix === "hydroshare" + providerPrefix === "hydroshare" || + providerPrefix === "ckan" ) { ref = ""; } diff --git a/binderhub/tests/test_repoproviders.py b/binderhub/tests/test_repoproviders.py index 87c6a3727..13715083b 100644 --- a/binderhub/tests/test_repoproviders.py +++ b/binderhub/tests/test_repoproviders.py @@ -6,6 +6,7 @@ from tornado.ioloop import IOLoop from binderhub.repoproviders import ( + CKANProvider, DataverseProvider, FigshareProvider, GistRepoProvider, @@ -209,6 +210,53 @@ async def test_dataverse( assert spec == resolved_spec +@pytest.mark.parametrize( + "spec,resolved_spec,resolved_ref,resolved_ref_url,build_slug", + [ + [ + "https://demo.ckan.org/dataset/sample-dataset-1", + "https://demo.ckan.org/dataset/sample-dataset-1", + "sample-dataset-1.v", + "https://demo.ckan.org/dataset/sample-dataset-1", + "ckan-sample-dataset-1", + ], + [ + "https://demo.datashades.com/dataset/chart-test?activity_id=061888e9-e3c2-4769-b097-9c195a841e2f", + "https://demo.datashades.com/dataset/chart-test?activity_id=061888e9-e3c2-4769-b097-9c195a841e2f", + "chart-test.v1717501747", + "https://demo.datashades.com/dataset/chart-test?activity_id=061888e9-e3c2-4769-b097-9c195a841e2f", + "ckan-chart-test", + ], + [ + "https://demo.datashades.com/dataset/chart-test/history/061888e9-e3c2-4769-b097-9c195a841e2f", + "https://demo.datashades.com/dataset/chart-test/history/061888e9-e3c2-4769-b097-9c195a841e2f", + "chart-test.v1717501747", + "https://demo.datashades.com/dataset/chart-test/history/061888e9-e3c2-4769-b097-9c195a841e2f", + "ckan-chart-test", + ], + ["https://demo.ckan.org/group/roger", None, None, None, None], + ["https://demo.ckan.org/dataset/nosuchdataset", None, None, None, None], + ], +) +async def test_ckan(spec, resolved_spec, resolved_ref, resolved_ref_url, build_slug): + provider = CKANProvider(spec=spec) + + ref = await provider.get_resolved_ref() + if not resolved_ref: + # We are done here if we don't expect to resolve + return + assert resolved_ref in ref + + slug = provider.get_build_slug() + assert slug == build_slug + repo_url = provider.get_repo_url() + assert repo_url == spec + ref_url = await provider.get_resolved_ref_url() + assert ref_url == resolved_ref_url + spec = await provider.get_resolved_spec() + assert spec == resolved_spec + + @pytest.mark.github_api @pytest.mark.parametrize( "repo,unresolved_ref,resolved_ref", diff --git a/docs/source/developer/repoproviders.rst b/docs/source/developer/repoproviders.rst index 083db3e27..ab648f1c4 100644 --- a/docs/source/developer/repoproviders.rst +++ b/docs/source/developer/repoproviders.rst @@ -36,6 +36,8 @@ Currently supported providers, their prefixes and specs are: +------------+--------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+ | Dataverse | ``dataverse`` | ```` | `Dataverse `_ is open source research data repository software installed all over the world. | +------------+--------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+ + | CKAN | ``ckan`` | ``/`` | `CKAN `_ is an open source data management system. | + +------------+--------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+ | Git | ``git`` | ``/`` | A generic repository provider for URLs that point directly to a git repository. | +------------+--------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/docs/source/reference/repoproviders.rst b/docs/source/reference/repoproviders.rst index d0f5ca37c..40b230d70 100644 --- a/docs/source/reference/repoproviders.rst +++ b/docs/source/reference/repoproviders.rst @@ -65,6 +65,11 @@ Module: :mod:`binderhub.repoproviders` .. autoconfigurable:: DataverseProvider :members: +:class:`CKANProvider` +--------------------------- + +.. autoconfigurable:: CKANProvider + :members: :class:`GitRepoProvider` ---------------------------