From 88b4c9b1e561dda0a005656a5c51973a9e75efed Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Wed, 1 Sep 2021 14:47:06 -0400 Subject: [PATCH 01/10] Support invoking `registry-submit` with neither `--sibling` nor `--url` --- datalad_registry_client/opts.py | 20 +++++--- datalad_registry_client/submit.py | 52 ++++++++++---------- datalad_registry_client/tests/test_submit.py | 32 ++++++++++-- 3 files changed, 68 insertions(+), 36 deletions(-) diff --git a/datalad_registry_client/opts.py b/datalad_registry_client/opts.py index 5f87ced5..486bca02 100644 --- a/datalad_registry_client/opts.py +++ b/datalad_registry_client/opts.py @@ -3,7 +3,9 @@ from typing import Any from typing import Dict +from typing import List from typing import Optional +from typing import Tuple from datalad.distribution.dataset import Dataset from datalad.distribution.dataset import EnsureDataset @@ -65,17 +67,23 @@ def process_args( if sibling not in remotes: raise ValueError("Unknown sibling: {}".format(sibling)) - if not url: - if not sibling: - raise ValueError( - "Must specify URL to use when sibling isn't given") + urls: List[Tuple[str, str]] = [] + if url: + urls.append((url, url_encode(url))) + elif sibling: url = repo.config.get("remote.{}.url".format(sibling)) - if not url: + if url is None: raise ValueError("Could not find URL for {}".format(sibling)) + urls.append((url, url_encode(url))) + else: + for r in repo.get_remotes(): + u = repo.get_remote_url(r) + assert u is not None + urls.append((u, url_encode(u))) endpoint = endpoint or repo.config.get( "datalad_registry.endpoint", DEFAULT_ENDPOINT) return dict(ds=ds, ds_id=ds_id, - sibling=sibling, url=url, url_encoded=url_encode(url), + sibling=sibling, urls=urls, endpoint=endpoint) diff --git a/datalad_registry_client/submit.py b/datalad_registry_client/submit.py index 3ce0ef8e..9aa4f724 100644 --- a/datalad_registry_client/submit.py +++ b/datalad_registry_client/submit.py @@ -37,36 +37,36 @@ def __call__( options = opts.process_args( dataset=dataset, sibling=sibling, url=url, endpoint=endpoint) ds_id = options["ds_id"] - url = options["url"] - url_encoded = options['url_encoded'] + urls = options["urls"] res_base = get_status_dict(action="registry-submit", logger=lgr, **options) base_url = f"{options['endpoint']}/datasets" - try: - r_url = requests.get( - f"{base_url}/{ds_id}/urls/{url_encoded}", - timeout=1) - r_url.raise_for_status() - except requests.exceptions.RequestException as exc: - yield dict(res_base, status="error", - message=("Check if URL is known failed: %s", exc)) - return - url_info = r_url.json() - if url_info.get("status") == "unknown": - msg = "Registered URL" - else: - msg = "Announced update" + for url, url_encoded in urls: + try: + r_url = requests.get( + f"{base_url}/{ds_id}/urls/{url_encoded}", + timeout=1) + r_url.raise_for_status() + except requests.exceptions.RequestException as exc: + yield dict(res_base, status="error", + message=("Check if URL is known failed: %s", exc)) + return + url_info = r_url.json() + if url_info.get("status") == "unknown": + msg = "Registered URL" + else: + msg = "Announced update" - try: - r_patch = requests.patch(f"{base_url}/{ds_id}/urls/{url_encoded}", - timeout=1) - r_patch.raise_for_status() - except requests.exceptions.RequestException as exc: - yield dict(res_base, status="error", - message=("Submitting URL failed: %s", exc)) - return - yield dict(res_base, status="ok", - message=("%s: %s", msg, url)) + try: + r_patch = requests.patch(f"{base_url}/{ds_id}/urls/{url_encoded}", + timeout=1) + r_patch.raise_for_status() + except requests.exceptions.RequestException as exc: + yield dict(res_base, status="error", + message=("Submitting URL failed: %s", exc)) + return + yield dict(res_base, status="ok", + message=("%s: %s", msg, url)) diff --git a/datalad_registry_client/tests/test_submit.py b/datalad_registry_client/tests/test_submit.py index 6216f2b5..ad6fbfbd 100644 --- a/datalad_registry_client/tests/test_submit.py +++ b/datalad_registry_client/tests/test_submit.py @@ -29,10 +29,6 @@ def test_submit_via_local(tmp_path): assert requests.get(query_url).json()["status"] == "unknown" - # If sibling is not specified, URL is required. - with pytest.raises(ValueError): - ds.registry_submit() - assert_in_results( ds.registry_submit(url=ds.path), action="registry-submit", type="dataset", @@ -82,6 +78,34 @@ def test_submit_via_sibling(tmp_path): assert requests.get(query_url).json()["status"] != "unknown" +@pytest.mark.devserver +@pytest.mark.slow +def test_submit_all_siblings(tmp_path): + ds_sib = dl.Dataset(tmp_path / "sib").create() + ds = dl.clone(ds_sib.path, str(tmp_path / "clone")) + + url2 = "https://www.example.nil/repo.git" + ds.config.set("remote.sibling2.url", url2, where="local") + + ds_id = ds.id + + query_urls = [ + f"{ENDPOINT}/datasets/{ds_id}/urls/{url_encode(u)}" + for u in [ds_sib.path, url2] + ] + + for qu in query_urls: + assert requests.get(qu).json()["status"] == "unknown" + + assert_in_results( + ds.registry_submit(), + action="registry-submit", type="dataset", + path=ds.path, status="ok") + + for qu in query_urls: + assert requests.get(qu).json()["status"] != "unknown" + + @pytest.mark.devserver @pytest.mark.slow def test_submit_explicit_endpoint(tmp_path): From eef2ec9fa164f2a3a365b047de11be79229a7150 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Wed, 1 Sep 2021 16:42:04 -0400 Subject: [PATCH 02/10] Update datalad_registry_client/opts.py Co-authored-by: Yaroslav Halchenko --- datalad_registry_client/opts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datalad_registry_client/opts.py b/datalad_registry_client/opts.py index 486bca02..d08d4315 100644 --- a/datalad_registry_client/opts.py +++ b/datalad_registry_client/opts.py @@ -76,7 +76,7 @@ def process_args( raise ValueError("Could not find URL for {}".format(sibling)) urls.append((url, url_encode(url))) else: - for r in repo.get_remotes(): + for r in repo.get_remotes(with_urls_only=True): u = repo.get_remote_url(r) assert u is not None urls.append((u, url_encode(u))) From 69e3a2bbd91d2d002a14b174ba8d6fa9d8f03baa Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Thu, 2 Sep 2021 14:42:37 -0400 Subject: [PATCH 03/10] Update datalad_registry_client/opts.py Co-authored-by: Yaroslav Halchenko --- datalad_registry_client/opts.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datalad_registry_client/opts.py b/datalad_registry_client/opts.py index d08d4315..6f0a7d6e 100644 --- a/datalad_registry_client/opts.py +++ b/datalad_registry_client/opts.py @@ -77,9 +77,9 @@ def process_args( urls.append((url, url_encode(url))) else: for r in repo.get_remotes(with_urls_only=True): - u = repo.get_remote_url(r) - assert u is not None - urls.append((u, url_encode(u))) + url = repo.get_remote_url(r) + assert url is not None + urls.append((url, url_encode(url))) endpoint = endpoint or repo.config.get( "datalad_registry.endpoint", From 7d3a072167ebaa296a23563ba2eea7d72b3ff8b1 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Thu, 2 Sep 2021 14:45:02 -0400 Subject: [PATCH 04/10] Adjust some code --- datalad_registry_client/opts.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/datalad_registry_client/opts.py b/datalad_registry_client/opts.py index 6f0a7d6e..2fa5c6f2 100644 --- a/datalad_registry_client/opts.py +++ b/datalad_registry_client/opts.py @@ -67,23 +67,23 @@ def process_args( if sibling not in remotes: raise ValueError("Unknown sibling: {}".format(sibling)) - urls: List[Tuple[str, str]] = [] + urls: List[str] = [] if url: - urls.append((url, url_encode(url))) + urls.append(url) elif sibling: url = repo.config.get("remote.{}.url".format(sibling)) if url is None: raise ValueError("Could not find URL for {}".format(sibling)) - urls.append((url, url_encode(url))) + urls.append(url) else: for r in repo.get_remotes(with_urls_only=True): url = repo.get_remote_url(r) assert url is not None - urls.append((url, url_encode(url))) + urls.append(url) endpoint = endpoint or repo.config.get( "datalad_registry.endpoint", DEFAULT_ENDPOINT) return dict(ds=ds, ds_id=ds_id, - sibling=sibling, urls=urls, + sibling=sibling, urls=[(url, url_encode(url)) for url in urls], endpoint=endpoint) From 9b3e4e095567f69b03072eaf64bf5c799b5b8d57 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Thu, 2 Sep 2021 15:59:39 -0400 Subject: [PATCH 05/10] Support rerunning tests without resetting devserver --- datalad_registry_client/tests/test_submit.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/datalad_registry_client/tests/test_submit.py b/datalad_registry_client/tests/test_submit.py index ad6fbfbd..137421bf 100644 --- a/datalad_registry_client/tests/test_submit.py +++ b/datalad_registry_client/tests/test_submit.py @@ -1,3 +1,5 @@ +import os +import time import subprocess as sp import datalad.api as dl @@ -84,7 +86,9 @@ def test_submit_all_siblings(tmp_path): ds_sib = dl.Dataset(tmp_path / "sib").create() ds = dl.clone(ds_sib.path, str(tmp_path / "clone")) - url2 = "https://www.example.nil/repo.git" + pid = os.getpid() + ts = time.time() + url2 = f"https://www.example.nil/{pid}/{ts}/repo.git" ds.config.set("remote.sibling2.url", url2, where="local") ds_id = ds.id From 83a6b8e6dc8c8baa8151a0f6778b6675407bc4dd Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Fri, 3 Sep 2021 10:09:16 -0400 Subject: [PATCH 06/10] Move URL-encoding out of opts.process_args() --- datalad_registry_client/opts.py | 3 +-- datalad_registry_client/submit.py | 5 ++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/datalad_registry_client/opts.py b/datalad_registry_client/opts.py index 2fa5c6f2..c9500dff 100644 --- a/datalad_registry_client/opts.py +++ b/datalad_registry_client/opts.py @@ -14,7 +14,6 @@ from datalad.support.constraints import EnsureStr from datalad.support.param import Parameter -from datalad_registry.utils import url_encode from datalad_registry_client.consts import DEFAULT_ENDPOINT common_params = dict( @@ -85,5 +84,5 @@ def process_args( "datalad_registry.endpoint", DEFAULT_ENDPOINT) return dict(ds=ds, ds_id=ds_id, - sibling=sibling, urls=[(url, url_encode(url)) for url in urls], + sibling=sibling, urls=urls, endpoint=endpoint) diff --git a/datalad_registry_client/submit.py b/datalad_registry_client/submit.py index 9aa4f724..e96636d3 100644 --- a/datalad_registry_client/submit.py +++ b/datalad_registry_client/submit.py @@ -12,8 +12,10 @@ from datalad.interface.results import get_status_dict from datalad.interface.utils import eval_results +from datalad_registry.utils import url_encode from datalad_registry_client import opts + lgr = logging.getLogger("datalad.registry.submit") @@ -44,7 +46,8 @@ def __call__( base_url = f"{options['endpoint']}/datasets" - for url, url_encoded in urls: + for url in urls: + url_encoded = url_encode(url) try: r_url = requests.get( f"{base_url}/{ds_id}/urls/{url_encoded}", From 093003ba0d0dc29002d9a864345c33a610c40d64 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Fri, 3 Sep 2021 10:18:16 -0400 Subject: [PATCH 07/10] Fix typing --- datalad_registry_client/submit.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datalad_registry_client/submit.py b/datalad_registry_client/submit.py index e96636d3..b33eaed2 100644 --- a/datalad_registry_client/submit.py +++ b/datalad_registry_client/submit.py @@ -46,8 +46,8 @@ def __call__( base_url = f"{options['endpoint']}/datasets" - for url in urls: - url_encoded = url_encode(url) + for u in urls: + url_encoded = url_encode(u) try: r_url = requests.get( f"{base_url}/{ds_id}/urls/{url_encoded}", @@ -72,4 +72,4 @@ def __call__( message=("Submitting URL failed: %s", exc)) return yield dict(res_base, status="ok", - message=("%s: %s", msg, url)) + message=("%s: %s", msg, u)) From c5a74f57f7dd0b25ea0b30e011b6a774f9ebcb07 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Fri, 3 Sep 2021 10:24:45 -0400 Subject: [PATCH 08/10] Make registry-submit's result dicts contain a singular `url` key instead of `urls` --- datalad_registry_client/submit.py | 8 +++---- datalad_registry_client/tests/test_submit.py | 25 ++++++++++---------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/datalad_registry_client/submit.py b/datalad_registry_client/submit.py index b33eaed2..cf5d1404 100644 --- a/datalad_registry_client/submit.py +++ b/datalad_registry_client/submit.py @@ -39,7 +39,7 @@ def __call__( options = opts.process_args( dataset=dataset, sibling=sibling, url=url, endpoint=endpoint) ds_id = options["ds_id"] - urls = options["urls"] + urls = options.pop("urls") # Don't include in res_base res_base = get_status_dict(action="registry-submit", logger=lgr, **options) @@ -54,7 +54,7 @@ def __call__( timeout=1) r_url.raise_for_status() except requests.exceptions.RequestException as exc: - yield dict(res_base, status="error", + yield dict(res_base, status="error", url=u, message=("Check if URL is known failed: %s", exc)) return url_info = r_url.json() @@ -68,8 +68,8 @@ def __call__( timeout=1) r_patch.raise_for_status() except requests.exceptions.RequestException as exc: - yield dict(res_base, status="error", + yield dict(res_base, status="error", url=u, message=("Submitting URL failed: %s", exc)) return - yield dict(res_base, status="ok", + yield dict(res_base, status="ok", url=u, message=("%s: %s", msg, u)) diff --git a/datalad_registry_client/tests/test_submit.py b/datalad_registry_client/tests/test_submit.py index 137421bf..d1f27577 100644 --- a/datalad_registry_client/tests/test_submit.py +++ b/datalad_registry_client/tests/test_submit.py @@ -34,7 +34,7 @@ def test_submit_via_local(tmp_path): assert_in_results( ds.registry_submit(url=ds.path), action="registry-submit", type="dataset", - path=ds.path, status="ok") + path=ds.path, url=ds.path, status="ok") assert requests.get(query_url).json()["status"] != "unknown" @@ -43,7 +43,7 @@ def test_submit_via_local(tmp_path): assert_in_results( res, action="registry-submit", type="dataset", - path=ds.path, status="ok") + path=ds.path, url=ds.path, status="ok") @pytest.mark.slow @@ -75,7 +75,7 @@ def test_submit_via_sibling(tmp_path): assert_in_results( ds.registry_submit(sibling="origin"), action="registry-submit", type="dataset", - path=ds.path, status="ok") + path=ds.path, url=ds_sib.path, status="ok") assert requests.get(query_url).json()["status"] != "unknown" @@ -93,18 +93,17 @@ def test_submit_all_siblings(tmp_path): ds_id = ds.id - query_urls = [ - f"{ENDPOINT}/datasets/{ds_id}/urls/{url_encode(u)}" - for u in [ds_sib.path, url2] - ] + urls = [ds_sib.path, url2] + query_urls = [f"{ENDPOINT}/datasets/{ds_id}/urls/{url_encode(u)}" for u in urls] for qu in query_urls: assert requests.get(qu).json()["status"] == "unknown" - assert_in_results( - ds.registry_submit(), - action="registry-submit", type="dataset", - path=ds.path, status="ok") + for u in urls: + assert_in_results( + ds.registry_submit(), + action="registry-submit", type="dataset", + path=ds.path, url=u, status="ok") for qu in query_urls: assert requests.get(qu).json()["status"] != "unknown" @@ -120,7 +119,7 @@ def test_submit_explicit_endpoint(tmp_path): assert_in_results( ds.registry_submit(url=ds.path, endpoint="abc", on_failure="ignore"), action="registry-submit", type="dataset", - path=ds.path, status="error") + path=ds.path, url=ds.path, status="error") # Valid, explicit. url_encoded = url_encode(ds.path) @@ -129,6 +128,6 @@ def test_submit_explicit_endpoint(tmp_path): assert_in_results( ds.registry_submit(url=ds.path, endpoint=DEFAULT_ENDPOINT), action="registry-submit", type="dataset", - path=ds.path, status="ok") + path=ds.path, url=ds.path, status="ok") assert requests.get(query_url).json()["status"] != "unknown" From e611f1335d8c2e32b04c222d23eebe5e3f09f13d Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 7 Sep 2021 11:35:01 -0400 Subject: [PATCH 09/10] Include url_encoded in result dicts --- datalad_registry_client/submit.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datalad_registry_client/submit.py b/datalad_registry_client/submit.py index cf5d1404..972fdc71 100644 --- a/datalad_registry_client/submit.py +++ b/datalad_registry_client/submit.py @@ -55,6 +55,7 @@ def __call__( r_url.raise_for_status() except requests.exceptions.RequestException as exc: yield dict(res_base, status="error", url=u, + url_encoded=url_encoded, message=("Check if URL is known failed: %s", exc)) return url_info = r_url.json() @@ -69,7 +70,8 @@ def __call__( r_patch.raise_for_status() except requests.exceptions.RequestException as exc: yield dict(res_base, status="error", url=u, + url_encoded=url_encoded, message=("Submitting URL failed: %s", exc)) return - yield dict(res_base, status="ok", url=u, + yield dict(res_base, status="ok", url=u, url_encoded=url_encoded, message=("%s: %s", msg, u)) From 9df6d300d9901aa7a1b9849df5c27d995e6e9771 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Tue, 7 Sep 2021 15:46:52 -0400 Subject: [PATCH 10/10] Fix --- datalad_registry_client/tests/test_submit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datalad_registry_client/tests/test_submit.py b/datalad_registry_client/tests/test_submit.py index d1f27577..a2b7d859 100644 --- a/datalad_registry_client/tests/test_submit.py +++ b/datalad_registry_client/tests/test_submit.py @@ -94,7 +94,7 @@ def test_submit_all_siblings(tmp_path): ds_id = ds.id urls = [ds_sib.path, url2] - query_urls = [f"{ENDPOINT}/datasets/{ds_id}/urls/{url_encode(u)}" for u in urls] + query_urls = [f"{DEFAULT_ENDPOINT}/datasets/{ds_id}/urls/{url_encode(u)}" for u in urls] for qu in query_urls: assert requests.get(qu).json()["status"] == "unknown"