From 19784976d80668f3d32769869239f0a7c0217c1c Mon Sep 17 00:00:00 2001 From: John Chilton Date: Thu, 12 Sep 2024 09:09:01 -0400 Subject: [PATCH] Fixes and tests for data fetch models. --- client/src/api/schema/schema.ts | 74 +++++++++++++ lib/galaxy/schema/fetch_data.py | 11 ++ lib/galaxy/tools/data_fetch.py | 4 + lib/galaxy_test/api/test_tools_upload.py | 61 +++++++++++ test/unit/app/tools/test_data_fetch.py | 132 +++++++++++++++++++++++ 5 files changed, 282 insertions(+) diff --git a/client/src/api/schema/schema.ts b/client/src/api/schema/schema.ts index a3a790978166..e42c9e0e9642 100644 --- a/client/src/api/schema/schema.ts +++ b/client/src/api/schema/schema.ts @@ -6330,6 +6330,12 @@ export interface components { CompositeDataElement: { /** Md5 */ MD5?: string | null; + /** Sha-1 */ + "SHA-1"?: string | null; + /** Sha-256 */ + "SHA-256"?: string | null; + /** Sha-512 */ + "SHA-512"?: string | null; /** * Auto Decompress * @description Decompress compressed data before sniffing? @@ -6360,6 +6366,8 @@ export interface components { */ ext: string; extra_files?: components["schemas"]["ExtraFiles"] | null; + /** Hashes */ + hashes?: components["schemas"]["FetchDatasetHash"][] | null; /** Info */ info?: string | null; /** Name */ @@ -8668,10 +8676,26 @@ export interface components { } & { [key: string]: unknown; }; + /** FetchDatasetHash */ + FetchDatasetHash: { + /** + * Hash Function + * @enum {string} + */ + hash_function: "MD5" | "SHA-1" | "SHA-256" | "SHA-512"; + /** Hash Value */ + hash_value: string; + }; /** FileDataElement */ FileDataElement: { /** Md5 */ MD5?: string | null; + /** Sha-1 */ + "SHA-1"?: string | null; + /** Sha-256 */ + "SHA-256"?: string | null; + /** Sha-512 */ + "SHA-512"?: string | null; /** * Auto Decompress * @description Decompress compressed data before sniffing? @@ -8701,6 +8725,8 @@ export interface components { */ ext: string; extra_files?: components["schemas"]["ExtraFiles"] | null; + /** Hashes */ + hashes?: components["schemas"]["FetchDatasetHash"][] | null; /** Info */ info?: string | null; /** Name */ @@ -8966,6 +8992,12 @@ export interface components { FtpImportElement: { /** Md5 */ MD5?: string | null; + /** Sha-1 */ + "SHA-1"?: string | null; + /** Sha-256 */ + "SHA-256"?: string | null; + /** Sha-512 */ + "SHA-512"?: string | null; /** * Auto Decompress * @description Decompress compressed data before sniffing? @@ -8997,6 +9029,8 @@ export interface components { extra_files?: components["schemas"]["ExtraFiles"] | null; /** Ftp Path */ ftp_path: string; + /** Hashes */ + hashes?: components["schemas"]["FetchDatasetHash"][] | null; /** Info */ info?: string | null; /** Name */ @@ -13374,6 +13408,12 @@ export interface components { NestedElement: { /** Md5 */ MD5?: string | null; + /** Sha-1 */ + "SHA-1"?: string | null; + /** Sha-256 */ + "SHA-256"?: string | null; + /** Sha-512 */ + "SHA-512"?: string | null; /** * Auto Decompress * @description Decompress compressed data before sniffing? @@ -13416,6 +13456,8 @@ export interface components { */ ext: string; extra_files?: components["schemas"]["ExtraFiles"] | null; + /** Hashes */ + hashes?: components["schemas"]["FetchDatasetHash"][] | null; /** Info */ info?: string | null; /** Name */ @@ -14018,6 +14060,12 @@ export interface components { PastedDataElement: { /** Md5 */ MD5?: string | null; + /** Sha-1 */ + "SHA-1"?: string | null; + /** Sha-256 */ + "SHA-256"?: string | null; + /** Sha-512 */ + "SHA-512"?: string | null; /** * Auto Decompress * @description Decompress compressed data before sniffing? @@ -14047,6 +14095,8 @@ export interface components { */ ext: string; extra_files?: components["schemas"]["ExtraFiles"] | null; + /** Hashes */ + hashes?: components["schemas"]["FetchDatasetHash"][] | null; /** Info */ info?: string | null; /** Name */ @@ -14078,6 +14128,12 @@ export interface components { PathDataElement: { /** Md5 */ MD5?: string | null; + /** Sha-1 */ + "SHA-1"?: string | null; + /** Sha-256 */ + "SHA-256"?: string | null; + /** Sha-512 */ + "SHA-512"?: string | null; /** * Auto Decompress * @description Decompress compressed data before sniffing? @@ -14107,6 +14163,8 @@ export interface components { */ ext: string; extra_files?: components["schemas"]["ExtraFiles"] | null; + /** Hashes */ + hashes?: components["schemas"]["FetchDatasetHash"][] | null; /** Info */ info?: string | null; /** Link Data Only */ @@ -14799,6 +14857,12 @@ export interface components { ServerDirElement: { /** Md5 */ MD5?: string | null; + /** Sha-1 */ + "SHA-1"?: string | null; + /** Sha-256 */ + "SHA-256"?: string | null; + /** Sha-512 */ + "SHA-512"?: string | null; /** * Auto Decompress * @description Decompress compressed data before sniffing? @@ -14828,6 +14892,8 @@ export interface components { */ ext: string; extra_files?: components["schemas"]["ExtraFiles"] | null; + /** Hashes */ + hashes?: components["schemas"]["FetchDatasetHash"][] | null; /** Info */ info?: string | null; /** Link Data Only */ @@ -16443,6 +16509,12 @@ export interface components { UrlDataElement: { /** Md5 */ MD5?: string | null; + /** Sha-1 */ + "SHA-1"?: string | null; + /** Sha-256 */ + "SHA-256"?: string | null; + /** Sha-512 */ + "SHA-512"?: string | null; /** * Auto Decompress * @description Decompress compressed data before sniffing? @@ -16472,6 +16544,8 @@ export interface components { */ ext: string; extra_files?: components["schemas"]["ExtraFiles"] | null; + /** Hashes */ + hashes?: components["schemas"]["FetchDatasetHash"][] | null; /** Info */ info?: string | null; /** Name */ diff --git a/lib/galaxy/schema/fetch_data.py b/lib/galaxy/schema/fetch_data.py index 2603b1471ea5..b415ffebf64e 100644 --- a/lib/galaxy/schema/fetch_data.py +++ b/lib/galaxy/schema/fetch_data.py @@ -101,6 +101,13 @@ class ExtraFiles(FetchBaseModel): ) +class FetchDatasetHash(Model): + hash_function: Literal["MD5", "SHA-1", "SHA-256", "SHA-512"] + hash_value: str + + model_config = ConfigDict(extra="forbid") + + class BaseDataElement(FetchBaseModel): name: Optional[CoercedStringType] = None dbkey: str = Field("?") @@ -116,6 +123,10 @@ class BaseDataElement(FetchBaseModel): items_from: Optional[ElementsFromType] = Field(None, alias="elements_from") collection_type: Optional[str] = None MD5: Optional[str] = None + SHA1: Optional[str] = Field(None, alias="SHA-1") + SHA256: Optional[str] = Field(None, alias="SHA-256") + SHA512: Optional[str] = Field(None, alias="SHA-512") + hashes: Optional[List[FetchDatasetHash]] = None description: Optional[str] = None model_config = ConfigDict(extra="forbid") diff --git a/lib/galaxy/tools/data_fetch.py b/lib/galaxy/tools/data_fetch.py index a6786c725dc9..540baacc2d91 100644 --- a/lib/galaxy/tools/data_fetch.py +++ b/lib/galaxy/tools/data_fetch.py @@ -250,6 +250,10 @@ def _resolve_item_with_primary(item): if url: sources.append(source_dict) hashes = item.get("hashes", []) + for hash_function in HASH_NAMES: + hash_value = item.get(hash_function) + if hash_value: + hashes.append({"hash_function": hash_function, "hash_value": hash_value}) for hash_dict in hashes: hash_function = hash_dict.get("hash_function") hash_value = hash_dict.get("hash_value") diff --git a/lib/galaxy_test/api/test_tools_upload.py b/lib/galaxy_test/api/test_tools_upload.py index 64d7e97365b9..ed32bc92caf0 100644 --- a/lib/galaxy_test/api/test_tools_upload.py +++ b/lib/galaxy_test/api/test_tools_upload.py @@ -1,6 +1,7 @@ import json import os import urllib.parse +from base64 import b64encode import pytest from tusclient import client @@ -25,6 +26,9 @@ ) from ._framework import ApiTestCase +B64_FOR_1_2_3 = b64encode(b"1 2 3").decode("utf-8") +URI_FOR_1_2_3 = f"base64://{B64_FOR_1_2_3}" + class TestToolsUpload(ApiTestCase): dataset_populator: DatasetPopulator @@ -927,6 +931,63 @@ def test_upload_and_validate_valid(self): terminal_validated_state = self.dataset_populator.validate_dataset_and_wait(history_id, dataset_id) assert terminal_validated_state == "ok", terminal_validated_state + def test_upload_and_validate_hash_valid(self): + with self.dataset_populator.test_history() as history_id: + destination = {"type": "hdas"} + targets = [ + { + "destination": destination, + "items": [ + { + "src": "url", + "url": URI_FOR_1_2_3, + "hashes": [ + {"hash_function": "SHA-1", "hash_value": "65e9d53484d28eef5447bc06fe2d754d1090975a"} + ], + }, + ], + } + ] + payload = { + "history_id": history_id, + "targets": targets, + "validate_hashes": True, + } + fetch_response = self.dataset_populator.fetch(payload) + self._assert_status_code_is(fetch_response, 200) + # history ok implies the dataset upload work + self.dataset_populator.wait_for_history(history_id, assert_ok=True) + + def test_upload_and_validate_hash_invalid(self): + with self.dataset_populator.test_history() as history_id: + destination = {"type": "hdas"} + targets = [ + { + "destination": destination, + "items": [ + { + "src": "url", + "url": URI_FOR_1_2_3, + "hashes": [{"hash_function": "SHA-1", "hash_value": "invalidhash"}], + }, + ], + } + ] + payload = { + "history_id": history_id, + "targets": targets, + "validate_hashes": True, + } + fetch_response = self.dataset_populator.fetch(payload, assert_ok=True, wait=False) + self._assert_status_code_is(fetch_response, 200) + outputs = fetch_response.json()["outputs"] + new_dataset = outputs[0] + self.dataset_populator.wait_for_history(history_id, assert_ok=False) + dataset_details = self.dataset_populator.get_history_dataset_details( + history_id, dataset=new_dataset, assert_ok=False + ) + assert dataset_details["state"] == "error" + def _velvet_upload(self, history_id, extra_inputs): payload = self.dataset_populator.upload_payload( history_id, diff --git a/test/unit/app/tools/test_data_fetch.py b/test/unit/app/tools/test_data_fetch.py index 58e13e5d1549..ee7cdd61536f 100644 --- a/test/unit/app/tools/test_data_fetch.py +++ b/test/unit/app/tools/test_data_fetch.py @@ -1,6 +1,7 @@ import json import os import tempfile +from base64 import b64encode from contextlib import contextmanager from shutil import rmtree from tempfile import mkdtemp @@ -8,6 +9,9 @@ from galaxy.tools.data_fetch import main from galaxy.util.unittest_utils import skip_if_github_down +B64_FOR_1_2_3 = b64encode(b"1 2 3").decode("utf-8") +URI_FOR_1_2_3 = f"base64://{B64_FOR_1_2_3}" + def test_simple_path_get(): with _execute_context() as execute_context: @@ -55,6 +59,134 @@ def test_simple_uri_get(): assert hda_result["ext"] == "bed" +def test_correct_md5(): + with _execute_context() as execute_context: + request = { + "targets": [ + { + "destination": { + "type": "hdas", + }, + "elements": [ + { + "src": "url", + "url": URI_FOR_1_2_3, + "hashes": [ + { + "hash_function": "MD5", + "hash_value": "5ba48b6e5a7c4d4930fda256f411e55b", + } + ], + } + ], + } + ], + "validate_hashes": True, + } + execute_context.execute_request(request) + output = _unnamed_output(execute_context) + hda_result = output["elements"][0] + assert hda_result["state"] == "ok" + assert hda_result["ext"] == "txt" + + +def test_incorrect_md5(): + with _execute_context() as execute_context: + request = { + "targets": [ + { + "destination": { + "type": "hdas", + }, + "elements": [ + { + "src": "url", + "url": URI_FOR_1_2_3, + "hashes": [ + { + "hash_function": "MD5", + "hash_value": "thisisbad", + } + ], + } + ], + } + ], + "validate_hashes": True, + } + execute_context.execute_request(request) + output = _unnamed_output(execute_context) + hda_result = output["elements"][0] + assert ( + hda_result["error_message"] + == "Failed to validate upload with [MD5] - expected [thisisbad] got [5ba48b6e5a7c4d4930fda256f411e55b]" + ) + + +def test_correct_sha1(): + with _execute_context() as execute_context: + request = { + "targets": [ + { + "destination": { + "type": "hdas", + }, + "elements": [ + { + "src": "url", + "url": URI_FOR_1_2_3, + "hashes": [ + { + "hash_function": "SHA-1", + "hash_value": "65e9d53484d28eef5447bc06fe2d754d1090975a", + } + ], + } + ], + } + ], + "validate_hashes": True, + } + execute_context.execute_request(request) + output = _unnamed_output(execute_context) + hda_result = output["elements"][0] + assert hda_result["state"] == "ok" + assert hda_result["ext"] == "txt" + + +def test_incorrect_sha1(): + with _execute_context() as execute_context: + request = { + "targets": [ + { + "destination": { + "type": "hdas", + }, + "elements": [ + { + "src": "url", + "url": URI_FOR_1_2_3, + "hashes": [ + { + "hash_function": "SHA-1", + "hash_value": "thisisbad", + } + ], + } + ], + } + ], + "validate_hashes": True, + } + execute_context.execute_request(request) + output = _unnamed_output(execute_context) + hda_result = output["elements"][0] + assert ( + hda_result["error_message"] + == "Failed to validate upload with [SHA-1] - expected [thisisbad] got [65e9d53484d28eef5447bc06fe2d754d1090975a]" + ) + + @skip_if_github_down def test_deferred_uri_get(): with _execute_context() as execute_context: