From 5a24427620f668dd7d5507010b9e5867e71a8115 Mon Sep 17 00:00:00 2001 From: Eli Chadwick Date: Wed, 26 Jun 2024 14:29:40 +0100 Subject: [PATCH 1/3] add check that creator identifiers are URIs --- planemo/workflow_lint.py | 13 +++ ...linted-author-identifier-best-practices.ga | 104 ++++++++++++++++++ tests/test_cmd_workflow_lint.py | 12 ++ 3 files changed, 129 insertions(+) create mode 100644 tests/data/wf19-unlinted-author-identifier-best-practices.ga diff --git a/planemo/workflow_lint.py b/planemo/workflow_lint.py index fdc2bbc80..73560f263 100644 --- a/planemo/workflow_lint.py +++ b/planemo/workflow_lint.py @@ -14,6 +14,7 @@ TYPE_CHECKING, Union, ) +from urllib.parse import urlparse import requests import yaml @@ -212,6 +213,18 @@ def check_json_for_untyped_params(j): # creator if not len(workflow_dict.get("creator", [])) > 0: lint_context.warn("Workflow does not specify a creator.") + else: + creators = workflow_dict.get("creator") + if type(creators) != list: + creators = [creators] + for creator in creators: + if creator.get("class", "").lower() == "person" and "identifier" in creator: + identifier = creator["identifier"] + parsed_url = urlparse(identifier) + if not parsed_url.scheme: + lint_context.warn( + f'Creator identifier "{identifier}" should be a fully qualified URI, for example "https://orcid.org/0000-0002-1825-0097".' + ) # license if not workflow_dict.get("license"): diff --git a/tests/data/wf19-unlinted-author-identifier-best-practices.ga b/tests/data/wf19-unlinted-author-identifier-best-practices.ga new file mode 100644 index 000000000..b3883f45c --- /dev/null +++ b/tests/data/wf19-unlinted-author-identifier-best-practices.ga @@ -0,0 +1,104 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "bp (imported from uploaded file)", + "creator": [ + { + "class": "Person", + "identifier": "0000-0002-1825-0097", + "name": "Josiah Carberry", + "url": "https://orcid.org/0000-0002-1825-0097" + } + ], + "steps": { + "0": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [], + "label": null, + "name": "Input dataset", + "outputs": [], + "position": { + "bottom": 730.3166656494141, + "height": 82.55000305175781, + "left": 1155, + "right": 1355, + "top": 647.7666625976562, + "width": 200, + "x": 1155, + "y": 647.7666625976562 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "4219d43a-e182-49c0-bee3-8422e6344911", + "workflow_outputs": [] + }, + "1": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.3", + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "runtime parameter for tool Replace Text", + "name": "infile" + } + ], + "label": null, + "name": "Replace Text", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "bottom": 744.8333282470703, + "height": 114.06666564941406, + "left": 1465, + "right": 1665, + "top": 630.7666625976562, + "width": 200, + "x": 1465, + "y": 630.7666625976562 + }, + "post_job_actions": { + "TagDatasetActionoutfile": { + "action_arguments": { + "tags": "${report_name}" + }, + "action_type": "TagDatasetAction", + "output_name": "outfile" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.3", + "tool_shed_repository": { + "changeset_revision": "ddf54b12c295", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"infile\": {\"__class__\": \"RuntimeValue\"}, \"replacements\": [{\"__index__\": 0, \"column\": \"1\", \"find_pattern\": \"${report_name}\", \"replace_pattern\": \"\"}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.1.3", + "type": "tool", + "uuid": "e429e21f-f01e-4efb-b665-56f454cbe38b", + "workflow_outputs": [ + { + "label": null, + "output_name": "outfile", + "uuid": "e4066fe7-c9a2-43af-a9f5-30a75a5b2107" + } + ] + } + }, + "tags": [], + "uuid": "8c13f49c-c0c2-4f3a-85d7-5bad6be1776f", + "version": 2 +} diff --git a/tests/test_cmd_workflow_lint.py b/tests/test_cmd_workflow_lint.py index 02d38c2e7..3736732d8 100644 --- a/tests/test_cmd_workflow_lint.py +++ b/tests/test_cmd_workflow_lint.py @@ -171,6 +171,18 @@ def test_best_practices_linting_ga(self): for warning in warnings: assert warning in result.output + def test_author_identifier_best_practices_linting_ga(self): + workflow_path = "/".join((TEST_DATA_DIR, "wf19-unlinted-author-identifier-best-practices.ga")) + lint_cmd = ["workflow_lint", workflow_path] + result = self._runner.invoke(self._cli.planemo, lint_cmd) + + warnings = [ + 'Creator identifier "0000-0002-1825-0097" should be a fully qualified URI, for example "https://orcid.org/0000-0002-1825-0097".', + ] + + for warning in warnings: + assert warning in result.output + def test_assertion_linting(self): workflow_path = "/".join((TEST_DATA_DIR, "wf15-test-assertions.yml")) lint_cmd = ["workflow_lint", workflow_path] From ee7c30b9c572009797c3221ee640449bf644dd36 Mon Sep 17 00:00:00 2001 From: Eli Chadwick Date: Wed, 26 Jun 2024 14:36:00 +0100 Subject: [PATCH 2/3] use isinstance for type checking --- planemo/workflow_lint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/planemo/workflow_lint.py b/planemo/workflow_lint.py index 73560f263..57b5f6805 100644 --- a/planemo/workflow_lint.py +++ b/planemo/workflow_lint.py @@ -215,7 +215,7 @@ def check_json_for_untyped_params(j): lint_context.warn("Workflow does not specify a creator.") else: creators = workflow_dict.get("creator") - if type(creators) != list: + if not isinstance(creators, list): creators = [creators] for creator in creators: if creator.get("class", "").lower() == "person" and "identifier" in creator: From 918d23b8ea5c92dd7859b00eeb9554cd908143b5 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Thu, 4 Jul 2024 12:15:26 +0200 Subject: [PATCH 3/3] Minor tweaks to workflow author linting --- planemo/workflow_lint.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/planemo/workflow_lint.py b/planemo/workflow_lint.py index 57b5f6805..3b91b935b 100644 --- a/planemo/workflow_lint.py +++ b/planemo/workflow_lint.py @@ -211,11 +211,13 @@ def check_json_for_untyped_params(j): lint_context.warn("Workflow is not annotated.") # creator - if not len(workflow_dict.get("creator", [])) > 0: + creators = workflow_dict.get("creator", []) + if not len(creators) > 0: lint_context.warn("Workflow does not specify a creator.") else: - creators = workflow_dict.get("creator") if not isinstance(creators, list): + # Don't know if this can happen, if we implement schema validation on the Galaxy side + # this won't be needed. creators = [creators] for creator in creators: if creator.get("class", "").lower() == "person" and "identifier" in creator: