From a924bdd56689752ea5f79430c0548101e5c1a76b Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Mon, 29 Jul 2024 11:47:22 +0200 Subject: [PATCH] fix linter errors and restore lint_urls for lint_tool_dependencies_urls --- planemo/linters/biocontainer_registered.py | 7 ++-- planemo/linters/conda_requirements.py | 7 ++-- planemo/linters/urls.py | 2 +- planemo/shed_lint.py | 42 ++++++++++++++++++++-- 4 files changed, 50 insertions(+), 8 deletions(-) diff --git a/planemo/linters/biocontainer_registered.py b/planemo/linters/biocontainer_registered.py index 01ac56349..ee14f86d8 100644 --- a/planemo/linters/biocontainer_registered.py +++ b/planemo/linters/biocontainer_registered.py @@ -1,6 +1,7 @@ """Ensure best-practice biocontainer registered for this tool.""" from typing import ( + List, Optional, TYPE_CHECKING, ) @@ -42,10 +43,12 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): name = mulled_container_name("biocontainers", targets) if not name: requirements_node = xml_node_from_toolsource(tool_source, "requirements") - lint_ctx.warn(MESSAGE_WARN_NO_CONTAINER, linter=cls.name(), node=requirements) + lint_ctx.warn(MESSAGE_WARN_NO_CONTAINER, linter=cls.name(), node=requirements_node) -def mulled_container_name(namespace: str, targets: List[CondaTarget]) -> Optional[str]: +def mulled_container_name(namespace: str, targets: List["CondaTarget"]) -> Optional[str]: name = targets_to_mulled_name(targets=targets, hash_func="v2", namespace=namespace) if name: return f"quay.io/{namespace}/{name}" + else: + return None diff --git a/planemo/linters/conda_requirements.py b/planemo/linters/conda_requirements.py index 3f13d7bd8..9fd99402a 100644 --- a/planemo/linters/conda_requirements.py +++ b/planemo/linters/conda_requirements.py @@ -8,13 +8,14 @@ from galaxy.tool_util.deps.conda_util import requirement_to_conda_targets from galaxy.tool_util.lint import Linter -from .util import xml_node_from_toolsource from planemo.conda import ( BEST_PRACTICE_CHANNELS, best_practice_search, ) +from .util import xml_node_from_toolsource if TYPE_CHECKING: + from galaxy.tool_util.deps.conda_util import CondaTarget from galaxy.tool_util.lint import LintContext from galaxy.tool_util.parser.interface import ToolSource @@ -32,7 +33,7 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): if best_hit and exact: message = f"Requirement [{conda_target_str}] matches target in best practice Conda channel [{best_hit.get('channel')}]." requirements_node = xml_node_from_toolsource(tool_source, "requirements") - lint_ctx.info(message, linter=cls.name(), node=requirements_nodes) + lint_ctx.info(message, linter=cls.name(), node=requirements_node) class CondaRequirementInexact(Linter): @@ -63,7 +64,7 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): lint_ctx.warn(message, linter=cls.name(), node=requirements_node) -def _requirements_conda_targets(tool_source: "ToolSource") -> Generator[CondaTarget]: +def _requirements_conda_targets(tool_source: "ToolSource") -> Generator["CondaTarget"]: requirements, *_ = tool_source.parse_requirements_and_containers() for requirement in requirements: conda_target = requirement_to_conda_targets(requirement) diff --git a/planemo/linters/urls.py b/planemo/linters/urls.py index bf374a463..4d3cca5a7 100644 --- a/planemo/linters/urls.py +++ b/planemo/linters/urls.py @@ -2,10 +2,10 @@ """ from typing import TYPE_CHECKING +from urllib.request import urlopen import requests from galaxy.tool_util.lint import Linter -from urllib.request import urlopen from planemo.shed import _find_urls_in_text diff --git a/planemo/shed_lint.py b/planemo/shed_lint.py index 62d2ecdcc..b68d981f2 100644 --- a/planemo/shed_lint.py +++ b/planemo/shed_lint.py @@ -3,7 +3,9 @@ import os import xml.etree.ElementTree as ET from typing import TYPE_CHECKING +from urllib.request import urlopen +import requests import yaml from galaxy.tool_util.lint import lint_tool_source_with from galaxy.tool_util.linters.help import rst_invalid @@ -17,6 +19,7 @@ ) from planemo.shed import ( CURRENT_CATEGORIES, + find_urls_for_xml, REPO_TYPE_SUITE, REPO_TYPE_TOOL_DEP, REPO_TYPE_UNRESTRICTED, @@ -188,8 +191,43 @@ def lint_readme(realized_repository, lint_ctx): def lint_tool_dependencies_urls(realized_repository, lint_ctx): - - + + def lint_urls(root, lint_ctx): + """Find referenced URLs and verify they are valid. + + note this function was used previously for tools (URLs in help) and tool dependency files + the former has been rewritten and therefore the function has been moved here + """ + urls, _ = find_urls_for_xml(root) + for url in urls: + is_valid = True + if url.startswith("http://") or url.startswith("https://"): + headers = None + r = None + try: + r = requests.get(url, headers=headers, stream=True) + r.raise_for_status() + next(r.iter_content(1000)) + except Exception as e: + if r is not None and r.status_code == 429: + # too many requests + pass + if r is not None and r.status_code in [403, 503] and "cloudflare" in r.text: + # CloudFlare protection block + pass + else: + is_valid = False + lint_ctx.error(f"Error '{e}' accessing {url}") + else: + try: + with urlopen(url) as handle: + handle.read(100) + except Exception as e: + is_valid = False + lint_ctx.error(f"Error '{e}' accessing {url}") + if is_valid: + lint_ctx.info("URL OK %s" % url) + path = realized_repository.real_path tool_dependencies = os.path.join(path, "tool_dependencies.xml") if not os.path.exists(tool_dependencies):