From 3f134c17fbd63660e4076e1f68ae83451869be64 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Mon, 9 Sep 2024 16:52:42 -0700 Subject: [PATCH 1/2] fix(worker): Prefer the s3-PUBLIC remote when found --- services/datalad/datalad_service/common/annex.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/services/datalad/datalad_service/common/annex.py b/services/datalad/datalad_service/common/annex.py index 709160afe..05c221af6 100644 --- a/services/datalad/datalad_service/common/annex.py +++ b/services/datalad/datalad_service/common/annex.py @@ -93,7 +93,7 @@ def parse_remote_line(remoteLine): if remoteConfig['type'] == 'S3' and remoteConfig['bucket'] in S3_BUCKETS_WHITELIST: remoteUuid = remoteLine[0:36] remoteUrl = remoteConfig['publicurl'] if 'publicurl' in remoteConfig else None - return {'uuid': remoteUuid, 'url': remoteUrl} + return {'uuid': remoteUuid, 'url': remoteUrl, 'name': remoteConfig['name']} def parse_rmet_line(remote, rmetLine): @@ -174,6 +174,10 @@ def get_repo_urls(path, files): matched_remote = parse_remote_line(line) if matched_remote: remote = matched_remote + # Prefer the s3-PUBLIC remote if we find one + # TODO - Identify the best remote dynamically + if matched_remote['name'] == 's3-PUBLIC': + break # Check if we found a useful external remote if remote: # Read the rest of the files. From 5622b6f04637388da702c38ef55a0fa063f1d4f5 Mon Sep 17 00:00:00 2001 From: Nell Hardcastle Date: Mon, 9 Sep 2024 17:01:42 -0700 Subject: [PATCH 2/2] tests(worker): Update test_parse_remote_line with new format including name --- services/datalad/tests/test_annex.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/datalad/tests/test_annex.py b/services/datalad/tests/test_annex.py index 57e2dbf6f..f0e2bbec6 100644 --- a/services/datalad/tests/test_annex.py +++ b/services/datalad/tests/test_annex.py @@ -108,7 +108,8 @@ def test_compute_rmet_sha256_annex(): def test_parse_remote_line(): remote = parse_remote_line("""57894849-d0c8-4c62-8418-3627be18a196 autoenable=true bucket=openneuro.org datacenter=US encryption=none exporttree=yes fileprefix=ds002778/ host=s3.amazonaws.com name=s3-PUBLIC partsize=1GiB port=80 public=yes publicurl=http://openneuro.org.s3.amazonaws.com/ storageclass=STANDARD type=S3 versioning=yes timestamp=1588743361.538097946s""") assert remote == {'url': 'http://openneuro.org.s3.amazonaws.com/', - 'uuid': '57894849-d0c8-4c62-8418-3627be18a196'} + 'uuid': '57894849-d0c8-4c62-8418-3627be18a196', + 'name': 's3-PUBLIC'} def test_parse_rmet_line():