From 09962eb2bd4bbd601766e43d6b5eac1a1d69caf5 Mon Sep 17 00:00:00 2001 From: jaimergp Date: Mon, 4 Nov 2024 20:40:57 +0100 Subject: [PATCH] Add ruff and pre-commit linting (#15) --- .devcontainer/pixi/Dockerfile | 2 +- .devcontainer/pixi/devcontainer.json | 2 +- .../streamlit-cloud/devcontainer.json | 2 +- .pre-commit-config.yaml | 30 ++ .streamlit/config.toml | 1 - app_config.schema.json | 2 +- conda_metadata_app/app.py | 12 +- conda_metadata_app/app_config.py | 62 +++- conda_metadata_app/pages/main_page.py | 287 +++++++++++------- .../pages/search_by_file_path_page.py | 10 +- conda_metadata_app/version_order.py | 9 +- docs/configuration.md | 8 +- pyproject.toml | 31 ++ 13 files changed, 312 insertions(+), 146 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.devcontainer/pixi/Dockerfile b/.devcontainer/pixi/Dockerfile index 9c00309..edaf385 100644 --- a/.devcontainer/pixi/Dockerfile +++ b/.devcontainer/pixi/Dockerfile @@ -10,4 +10,4 @@ RUN curl -L -o /usr/local/bin/pixi -fsSL --compressed "https://github.com/prefix USER vscode WORKDIR /home/vscode -RUN echo 'eval "$(pixi completion -s bash)"' >> /home/vscode/.bashrc \ No newline at end of file +RUN echo 'eval "$(pixi completion -s bash)"' >> /home/vscode/.bashrc diff --git a/.devcontainer/pixi/devcontainer.json b/.devcontainer/pixi/devcontainer.json index 858488b..7442f61 100644 --- a/.devcontainer/pixi/devcontainer.json +++ b/.devcontainer/pixi/devcontainer.json @@ -30,4 +30,4 @@ "forwardPorts": [ 8501 ] -} \ No newline at end of file +} diff --git a/.devcontainer/streamlit-cloud/devcontainer.json b/.devcontainer/streamlit-cloud/devcontainer.json index 19a4b7d..1aeaab3 100644 --- a/.devcontainer/streamlit-cloud/devcontainer.json +++ b/.devcontainer/streamlit-cloud/devcontainer.json @@ -31,4 +31,4 @@ "forwardPorts": [ 8501 ] -} \ No newline at end of file +} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..49addff --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,30 @@ +# disable autofixing PRs, commenting "pre-commit.ci autofix" on a pull request triggers a autofix +ci: + autofix_prs: false +repos: + # generic verification and formatting + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + # standard end of line/end of file cleanup + - id: mixed-line-ending + - id: end-of-file-fixer + - id: trailing-whitespace + # ensure syntaxes are valid + - id: check-toml + - id: check-yaml + # catch git merge/rebase problems + - id: check-merge-conflict + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.7.1 + hooks: + # lint & attempt to correct failures (e.g. pyupgrade) + - id: ruff + args: [--fix] + # compatible replacement for black + - id: ruff-format + - repo: meta + # see https://pre-commit.com/#meta-hooks + hooks: + - id: check-hooks-apply + - id: check-useless-excludes diff --git a/.streamlit/config.toml b/.streamlit/config.toml index 2fe9feb..8a46f15 100644 --- a/.streamlit/config.toml +++ b/.streamlit/config.toml @@ -3,4 +3,3 @@ gatherUsageStats = false [logger] level = "debug" - diff --git a/app_config.schema.json b/app_config.schema.json index e14548c..e4e7987 100644 --- a/app_config.schema.json +++ b/app_config.schema.json @@ -362,4 +362,4 @@ ], "title": "AppConfig", "type": "object" -} \ No newline at end of file +} diff --git a/conda_metadata_app/app.py b/conda_metadata_app/app.py index 016f6fa..e12787e 100644 --- a/conda_metadata_app/app.py +++ b/conda_metadata_app/app.py @@ -1,15 +1,15 @@ -from pathlib import Path - -import streamlit as st - -from conda_metadata_app.app_config import AppConfig - """ If deploying a streamlit app as a Python module, we cannot use the automatic pages/ subpages. Instead, we need to define the pages manually. """ +from pathlib import Path + +import streamlit as st + +from conda_metadata_app.app_config import AppConfig + pages_dir = Path(__file__).parent / "pages" pages = [ diff --git a/conda_metadata_app/app_config.py b/conda_metadata_app/app_config.py index 8c9c32a..dcf237a 100644 --- a/conda_metadata_app/app_config.py +++ b/conda_metadata_app/app_config.py @@ -1,12 +1,29 @@ +""" +Pydantic models to generate the schemas of the app_config.toml configuration files +""" + import functools import json import os from collections.abc import Iterable from enum import StrEnum -from typing import Self, Literal - -from pydantic import AnyHttpUrl, BaseModel, field_validator, TypeAdapter, ValidationError, model_validator, ConfigDict -from pydantic_settings import BaseSettings, SettingsConfigDict, PydanticBaseSettingsSource, TomlConfigSettingsSource +from typing import Literal, Self + +from pydantic import ( + AnyHttpUrl, + BaseModel, + ConfigDict, + TypeAdapter, + ValidationError, + field_validator, + model_validator, +) +from pydantic_settings import ( + BaseSettings, + PydanticBaseSettingsSource, + SettingsConfigDict, + TomlConfigSettingsSource, +) class HashableBaseModel(BaseModel): @@ -63,6 +80,7 @@ class PackageFilter(BaseModel): By default, no packages are filtered. By setting allowed_names or allowed_prefixes, only packages that match the criteria will be considered. """ + allowed_names: list[str] = [] """ Whitelist of package names. Only packages with names in this list will be considered. @@ -163,7 +181,9 @@ class Channel(BaseModel): """ How to discover artifacts in the channel, given a package name. """ - arch_subdir_discovery: ArchSubdirDiscoveryChoice | ArchSubdirList = ArchSubdirDiscoveryChoice.CHANNELDATA + arch_subdir_discovery: ArchSubdirDiscoveryChoice | ArchSubdirList = ( + ArchSubdirDiscoveryChoice.CHANNELDATA + ) """ How to discover architecture subdirectories in the channel. Use an ArchSubdirList to specify a list of subdirectories. @@ -178,7 +198,7 @@ class Channel(BaseModel): """ Enable this for conda-forge to map package names to feedstock names. This is used for provenance URLs (see below). - + If this is False, the package name is used as the feedstock name. """ provenance_url_pattern: str | None = None @@ -186,7 +206,7 @@ class Channel(BaseModel): A URL pattern to link to the provenance of a package. The URL pattern should contain a `{feedstock}` placeholder for the feedstock (!) name (see map_conda_forge_package_to_feedstock). Each placeholder will be replaced with the feedstock name. - + For conda-forge, this should be https://github.com/conda-forge/{feedstock}-feedstock. A remote URL present in the metadata always takes precedence over this URL pattern. """ @@ -204,7 +224,9 @@ def _validate_provenance_url_pattern(cls, provenance_url_pattern: str | None) -> try: TypeAdapter(AnyHttpUrl).validate_python(replaced_url) except ValidationError: - raise ValueError("provenance_url_pattern must be a valid URL pattern with a {} placeholder.") + raise ValueError( + "provenance_url_pattern must be a valid URL pattern with a {} placeholder." + ) return provenance_url_pattern @@ -220,8 +242,13 @@ def _validate_provenance_url_pattern(cls, provenance_url_pattern: str | None) -> @model_validator(mode="after") def check_supports_broken_label_artifact_discovery(self) -> Self: - if self.supports_broken_label and self.artifact_discovery != ArtifactDiscoveryChoice.ANACONDA_API: - raise ValueError("supports_broken_label is only supported for Anaconda API artifact discovery.") + if ( + self.supports_broken_label + and self.artifact_discovery != ArtifactDiscoveryChoice.ANACONDA_API + ): + raise ValueError( + "supports_broken_label is only supported for Anaconda API artifact discovery." + ) return self dashboards: list[str] = [] @@ -281,7 +308,6 @@ def _check_single_or_no_auth(self) -> Self: return self - @property def rss_url(self) -> str: return f"{self.url}/rss.xml" @@ -296,7 +322,9 @@ def get_repodata_url(self, arch_subdir: str) -> str: def get_zstd_repodata_url(self, arch_subdir: str) -> str: return self.get_repodata_url(arch_subdir) + ".zst" - def get_artifact_download_url(self, arch_subdir: str, package_name: str, version: str, build_string: str, extension: str) -> str: + def get_artifact_download_url( + self, arch_subdir: str, package_name: str, version: str, build_string: str, extension: str + ) -> str: return f"{self.url}/{arch_subdir}/{package_name}-{version}-{build_string}.{extension}" model_config = ConfigDict(extra="forbid", use_attribute_docstrings=True) @@ -306,7 +334,7 @@ class Dashboard(BaseModel): url_pattern: str """ The URL pattern of the dashboard. The URL pattern can contain the following placeholders within curly {} braces: - + - `channel`: The channel name. If the channel name contains a slash, only the second part is used. - `name`: The name of the package. - `version`: The version of the package. @@ -335,7 +363,6 @@ def _validate_url_pattern(cls, url_pattern: str) -> str: model_config = ConfigDict(extra="forbid", use_attribute_docstrings=True) - class AppConfig(BaseSettings): channels: dict[str, Channel] """ @@ -372,7 +399,9 @@ def _validate_dashboards(self) -> Self: raise ValueError(f"Dashboard {dashboard_name} is not defined.") return self - model_config = SettingsConfigDict(toml_file="app_config.toml", extra="forbid", use_attribute_docstrings=True) + model_config = SettingsConfigDict( + toml_file="app_config.toml", extra="forbid", use_attribute_docstrings=True + ) @classmethod def settings_customise_sources( @@ -389,7 +418,8 @@ def settings_customise_sources( def export_json_schema() -> None: with open("app_config.schema.json", "w") as f: json.dump(AppConfig.model_json_schema(), f, indent=2) + f.write("\n") -if __name__ == '__main__': +if __name__ == "__main__": export_json_schema() diff --git a/conda_metadata_app/pages/main_page.py b/conda_metadata_app/pages/main_page.py index 80721b9..5b0891b 100644 --- a/conda_metadata_app/pages/main_page.py +++ b/conda_metadata_app/pages/main_page.py @@ -1,23 +1,38 @@ +""" +The main page is the home of the application. + +With no inputs selected in the sidebar, it will list the latest uploads to the channel +(if available). If the user selects an artifact via the sidebar, it will list its metadata. +The metadata can also be listed directly if accessed via a `?q=` URL. +""" + import json import os import re import typing -from typing import Any -from collections.abc import Iterable from contextlib import closing from datetime import datetime from difflib import unified_diff from inspect import cleandoc from io import StringIO from tempfile import gettempdir +from typing import Any import zstandard as zstd from conda_forge_metadata.types import ArtifactData from rattler.platform import PlatformLiteral from requests.auth import HTTPBasicAuth -from conda_metadata_app.app_config import (AppConfig, Channel, PackageDiscoveryChoice, ArchSubdirDiscoveryChoice, - ArchSubdirList, ArtifactDiscoveryChoice, MetadataRetrieval, Secret) +from conda_metadata_app.app_config import ( + AppConfig, + ArchSubdirDiscoveryChoice, + ArchSubdirList, + ArtifactDiscoveryChoice, + Channel, + MetadataRetrieval, + PackageDiscoveryChoice, + Secret, +) from conda_metadata_app.version_order import VersionOrder if not os.environ.get("CACHE_DIR"): @@ -27,6 +42,8 @@ gettempdir(), "conda-oci-mirror-cache" ) +from xml.etree import ElementTree as ET + import requests import streamlit as st from conda_forge_metadata.artifact_info.info_json import get_artifact_info_as_json @@ -35,8 +52,9 @@ from conda_package_streaming.url import conda_reader_for_url from ruamel.yaml import YAML from streamlit.logger import get_logger -from xml.etree import ElementTree as ET +if typing.TYPE_CHECKING: + from collections.abc import Iterable yaml = YAML(typ="safe") yaml.allow_duplicate_keys = True @@ -48,6 +66,7 @@ initial_sidebar_state="expanded", ) + def bar_esc(s: str) -> str: "Escape vertical bars in tables" return s.replace("|", "\\|") @@ -93,7 +112,6 @@ def _make_http_session(channel_name: str) -> requests.Session: return session - @st.cache_resource(ttl="15m", max_entries=5) def rss_data(channel_name: str) -> ET.ElementTree | None: """ @@ -134,6 +152,7 @@ def _download_compressed_repodata(channel_name: str, arch_subdir: str) -> dict | with dctx.stream_reader(r.raw) as reader: return json.load(reader) + @st.cache_resource(ttl="15m", max_entries=50) def get_repodata(channel_name: str, arch_subdir: str) -> dict: """ @@ -156,7 +175,9 @@ def get_repodata(channel_name: str, arch_subdir: str) -> dict: return r.json() -def get_all_packages_sections_from_repodata(channel_name: str, arch_subdir: str, with_broken: bool) -> dict: +def get_all_packages_sections_from_repodata( + channel_name: str, arch_subdir: str, with_broken: bool +) -> dict: """ Contains the "packages" and "packages.conda" sections of the repodata. @@ -242,10 +263,7 @@ def provenance_urls(package_name: str, channel: str, data: dict | None = None) - else: feedstock_names = [package_name] - return [ - url_pattern.format(feedstock=feedstock_name) - for feedstock_name in feedstock_names - ] + return [url_pattern.format(feedstock=feedstock_name) for feedstock_name in feedstock_names] def get_package_names(channel_name: str) -> list[str]: @@ -263,7 +281,10 @@ def get_package_names(channel_name: str) -> list[str]: for subdir in all_subdirs: all_packages.update( - pkg["name"] for pkg in get_all_packages_sections_from_repodata(channel_name, subdir, with_broken=True).values() + pkg["name"] + for pkg in get_all_packages_sections_from_repodata( + channel_name, subdir, with_broken=True + ).values() ) else: raise RuntimeError("Invalid package discovery choice. This is an implementation error.") @@ -294,7 +315,6 @@ def _discover_arch_subdirs_exhaustively(channel_name: str) -> list[str]: return all_subdirs - def get_all_arch_subdirs(channel_name: str) -> list[str]: """ Get all arch subdirs (e.g., noarch, osx-64, linux-64) of a channel. @@ -311,11 +331,14 @@ def get_all_arch_subdirs(channel_name: str) -> list[str]: if isinstance(discovery_choice, ArchSubdirList): return discovery_choice.subdirs - raise RuntimeError(f"Invalid arch subdir discovery choice: {discovery_choice} This is an implementation error.") - + raise RuntimeError( + f"Invalid arch subdir discovery choice: {discovery_choice} This is an implementation error." + ) -def get_arch_subdirs_for_package(package_name: str, channel_name: str, with_broken: bool = False) -> list[str]: +def get_arch_subdirs_for_package( + package_name: str, channel_name: str, with_broken: bool = False +) -> list[str]: """ Get the arch subdirs for a package. The arch subdirs are sorted, ascending. @@ -329,27 +352,33 @@ def get_arch_subdirs_for_package(package_name: str, channel_name: str, with_brok if arch_subdir_discovery_choice == ArchSubdirDiscoveryChoice.CHANNELDATA: all_subdirs = get_channeldata(channel_name)["packages"][package_name]["subdirs"] - elif arch_subdir_discovery_choice == ArchSubdirDiscoveryChoice.ALL or \ - isinstance(arch_subdir_discovery_choice, ArchSubdirList): + elif arch_subdir_discovery_choice == ArchSubdirDiscoveryChoice.ALL or isinstance( + arch_subdir_discovery_choice, ArchSubdirList + ): all_subdirs = get_all_arch_subdirs(channel_name) else: - raise RuntimeError("Invalid arch subdir discovery choice. This is an implementation error.") + raise RuntimeError( + "Invalid arch subdir discovery choice. This is an implementation error." + ) return sorted( - subdir for subdir in all_subdirs + subdir + for subdir in all_subdirs if get_versions(channel_name, subdir, package_name, with_broken=with_broken) ) - -def _best_version_in_subdir(package_name: str, channel_name: str, with_broken: bool = False) \ - -> tuple[str, str] | tuple[None, None]: +def _best_version_in_subdir( + package_name: str, channel_name: str, with_broken: bool = False +) -> tuple[str, str] | tuple[None, None]: if not package_name: return None, None subdirs_plus_best_version = sorted( ( (subdir, get_versions(channel_name, subdir, package_name, with_broken=with_broken)[0]) - for subdir in get_arch_subdirs_for_package(package_name, channel_name, with_broken=with_broken) + for subdir in get_arch_subdirs_for_package( + package_name, channel_name, with_broken=with_broken + ) ), key=lambda x: VersionOrder(x[1]), reverse=True, @@ -359,7 +388,9 @@ def _best_version_in_subdir(package_name: str, channel_name: str, with_broken: b return None, None -def get_versions(channel_name: str, subdir: str, package_name: str, with_broken: bool = False) -> list[str]: +def get_versions( + channel_name: str, subdir: str, package_name: str, with_broken: bool = False +) -> list[str]: """ Get the versions of a package in a channel and subdir. If package_name or subdir are empty, return an empty list. @@ -386,11 +417,11 @@ def get_versions(channel_name: str, subdir: str, package_name: str, with_broken: and (with_broken or "broken" not in pkg["labels"]) } elif discovery_choice == ArtifactDiscoveryChoice.REPODATA: - repodata_pkg = get_all_packages_sections_from_repodata(channel_name, subdir, with_broken=with_broken) + repodata_pkg = get_all_packages_sections_from_repodata( + channel_name, subdir, with_broken=with_broken + ) all_versions = { - pkg["version"] - for pkg in repodata_pkg.values() - if pkg["name"] == package_name + pkg["version"] for pkg in repodata_pkg.values() if pkg["name"] == package_name } else: raise RuntimeError("Invalid artifact discovery choice. This is an implementation error.") @@ -402,8 +433,9 @@ def get_versions(channel_name: str, subdir: str, package_name: str, with_broken: ) -def _build_mapping_from_anaconda_api(package_name: str, subdir: str, version: str, channel: str, - with_broken: bool = False) -> dict[str, int]: +def _build_mapping_from_anaconda_api( + package_name: str, subdir: str, version: str, channel: str, with_broken: bool = False +) -> dict[str, int]: """ Returns a mapping from build string to build number. """ @@ -412,13 +444,15 @@ def _build_mapping_from_anaconda_api(package_name: str, subdir: str, version: st pkg["attrs"]["build"]: pkg["attrs"]["build_number"] for pkg in data if pkg["attrs"]["subdir"] == subdir - and pkg["version"] == version - and "main" in pkg["labels"] - and (with_broken or "broken" not in pkg["labels"]) + and pkg["version"] == version + and "main" in pkg["labels"] + and (with_broken or "broken" not in pkg["labels"]) } -def _build_mapping_from_repodata(package_name: str, subdir: str, version: str, channel: str, with_broken: bool) -> dict[str, int]: +def _build_mapping_from_repodata( + package_name: str, subdir: str, version: str, channel: str, with_broken: bool +) -> dict[str, int]: """ Note: This function cannot consider labels as they are not present in the repodata. Returns a mapping from build string to build number. @@ -428,12 +462,13 @@ def _build_mapping_from_repodata(package_name: str, subdir: str, version: str, c return { pkg["build"]: pkg["build_number"] for pkg in repodata_packages.values() - if pkg["name"] == package_name - and pkg["version"] == version + if pkg["name"] == package_name and pkg["version"] == version } -def builds(package_name: str, subdir: str, version: str, channel: str, with_broken: bool = False) -> list[str]: +def builds( + package_name: str, subdir: str, version: str, channel: str, with_broken: bool = False +) -> list[str]: if not package_name or not subdir or not version: return [] @@ -441,48 +476,61 @@ def builds(package_name: str, subdir: str, version: str, channel: str, with_brok discovery_choice = get_channel_config(channel).artifact_discovery if discovery_choice == ArtifactDiscoveryChoice.ANACONDA_API: - build_str_to_num = _build_mapping_from_anaconda_api(package_name, subdir, version, channel, with_broken) + build_str_to_num = _build_mapping_from_anaconda_api( + package_name, subdir, version, channel, with_broken + ) elif discovery_choice == ArtifactDiscoveryChoice.REPODATA: - build_str_to_num = _build_mapping_from_repodata(package_name, subdir, version, channel, with_broken) + build_str_to_num = _build_mapping_from_repodata( + package_name, subdir, version, channel, with_broken + ) else: raise RuntimeError("Invalid artifact discovery choice. This is an implementation error.") return [ - k - for k, _ in sorted( - build_str_to_num.items(), key=lambda kv: (kv[1], kv[0]), reverse=True - ) + k for k, _ in sorted(build_str_to_num.items(), key=lambda kv: (kv[1], kv[0]), reverse=True) ] -def _extensions_from_anaconda_api(package_name: str, subdir: str, version: str, build: str, channel: str, - with_broken: bool = False) -> set[str]: +def _extensions_from_anaconda_api( + package_name: str, + subdir: str, + version: str, + build: str, + channel: str, + with_broken: bool = False, +) -> set[str]: data = anaconda_api_data(package_name, channel) return { ("conda" if pkg["basename"].endswith(".conda") else "tar.bz2") for pkg in data if pkg["attrs"]["subdir"] == subdir - and pkg["version"] == version - and pkg["attrs"]["build"] == build - and "main" in pkg["labels"] - and (with_broken or "broken" not in pkg["labels"]) + and pkg["version"] == version + and pkg["attrs"]["build"] == build + and "main" in pkg["labels"] + and (with_broken or "broken" not in pkg["labels"]) } -def _extensions_from_repodata(package_name: str, subdir: str, version: str, build: str, channel: str, with_broken: bool) -> set[str]: +def _extensions_from_repodata( + package_name: str, subdir: str, version: str, build: str, channel: str, with_broken: bool +) -> set[str]: repodata_packages = get_all_packages_sections_from_repodata(channel, subdir, with_broken) return { ("conda" if filename.endswith(".conda") else "tar.bz2") for filename, pkg in repodata_packages.items() - if pkg["name"] == package_name - and pkg["version"] == version - and pkg["build"] == build + if pkg["name"] == package_name and pkg["version"] == version and pkg["build"] == build } -def extensions(package_name: str, subdir: str, version: str, build: str, channel: str, - with_broken: bool = False) -> list[str]: +def extensions( + package_name: str, + subdir: str, + version: str, + build: str, + channel: str, + with_broken: bool = False, +) -> list[str]: if not package_name or not subdir or not version or not build: return [] if override_extensions := get_channel_config(channel).override_extensions: @@ -491,13 +539,21 @@ def extensions(package_name: str, subdir: str, version: str, build: str, channel discovery_choice = get_channel_config(channel).artifact_discovery if discovery_choice == ArtifactDiscoveryChoice.ANACONDA_API: - return sorted(_extensions_from_anaconda_api(package_name, subdir, version, build, channel, with_broken)) + return sorted( + _extensions_from_anaconda_api( + package_name, subdir, version, build, channel, with_broken + ) + ) if discovery_choice == ArtifactDiscoveryChoice.REPODATA: - return sorted(_extensions_from_repodata(package_name, subdir, version, build, channel, with_broken)) + return sorted( + _extensions_from_repodata(package_name, subdir, version, build, channel, with_broken) + ) raise RuntimeError("Invalid artifact discovery choice. This is an implementation error.") -def _is_broken_anaconda_api(package_name: str, subdir: str, version: str, build: str, extension: str, channel: str) -> bool: +def _is_broken_anaconda_api( + package_name: str, subdir: str, version: str, build: str, extension: str, channel: str +) -> bool: channel_config = get_channel_config(channel) if not channel_config.supports_broken_label: return False @@ -505,16 +561,18 @@ def _is_broken_anaconda_api(package_name: str, subdir: str, version: str, build: data = anaconda_api_data(package_name, channel) for pkg in data: if ( - pkg["attrs"]["subdir"] == subdir - and pkg["version"] == version - and pkg["attrs"]["build"] == build - and pkg["basename"].endswith(extension) + pkg["attrs"]["subdir"] == subdir + and pkg["version"] == version + and pkg["attrs"]["build"] == build + and pkg["basename"].endswith(extension) ): return "broken" in pkg["labels"] return False -def _is_broken_repodata(package_name: str, subdir: str, version: str, build: str, extension: str, channel: str) -> bool: +def _is_broken_repodata( + package_name: str, subdir: str, version: str, build: str, extension: str, channel: str +) -> bool: repodata = get_repodata(channel, subdir) artifact_name = f"{package_name}-{version}-{build}.{extension}" @@ -522,7 +580,9 @@ def _is_broken_repodata(package_name: str, subdir: str, version: str, build: str return artifact_name in repodata.get("removed", []) -def _is_broken(package_name: str, subdir: str, version: str, build: str, extension: str, channel: str) -> bool: +def _is_broken( + package_name: str, subdir: str, version: str, build: str, extension: str, channel: str +) -> bool: channel_config = get_channel_config(channel) if channel_config.artifact_discovery == ArtifactDiscoveryChoice.ANACONDA_API: return _is_broken_anaconda_api(package_name, subdir, version, build, extension, channel) @@ -531,7 +591,6 @@ def _is_broken(package_name: str, subdir: str, version: str, build: str, extensi raise RuntimeError("Invalid artifact discovery choice. This is an implementation error.") - def patched_repodata(channel: str, subdir: str, artifact: str) -> tuple[dict, bool]: """ This function assumes that the artifact discovery mode for the channel is "anaconda". @@ -571,7 +630,7 @@ def artifact_metadata(channel: str, subdir: str, artifact: str) -> ArtifactData artifact=artifact, backend="streamed", skip_files_suffixes=(), - session=authenticated_session + session=authenticated_session, ) @@ -628,10 +687,10 @@ def parse_url_params() -> tuple[dict[str, Any], bool]: if artifact: if artifact.endswith(".conda"): extension = "conda" - rest_of_artifact = artifact[:-len(".conda")] + rest_of_artifact = artifact[: -len(".conda")] elif artifact.endswith(".tar.bz2"): extension = "tar.bz2" - rest_of_artifact = artifact[:-len(".tar.bz2")] + rest_of_artifact = artifact[: -len(".tar.bz2")] elif artifact.endswith("."): extension = None rest_of_artifact = artifact.rstrip(".") @@ -658,7 +717,7 @@ def parse_url_params() -> tuple[dict[str, Any], bool]: "build": build, "extension": extension, "path": path, - "with_broken": with_broken + "with_broken": with_broken, }, ok @@ -701,8 +760,9 @@ def parse_url_params() -> tuple[dict[str, Any], bool]: for channel in app_config().channels if get_channel_config(channel).repodata_patches_package ] -_with_patches_help_extra = f" Only for {_patched_metadata_channels[0]}." \ - if len(_patched_metadata_channels) == 1 else "" +_with_patches_help_extra = ( + f" Only for {_patched_metadata_channels[0]}." if len(_patched_metadata_channels) == 1 else "" +) _with_patches_help: str if _patched_metadata_channels: _with_patches_help = "Requires extra API calls. Slow!" + _with_patches_help_extra @@ -741,18 +801,24 @@ def parse_url_params() -> tuple[dict[str, Any], bool]: _all_channels, key="channel", # Use the user provided channel (via query params) if possible. - index=_all_channels.index(url_params["channel"]) if url_params["channel"] in _all_channels else 0, + index=_all_channels.index(url_params["channel"]) + if url_params["channel"] in _all_channels + else 0, ) - _available_package_names = [""] + get_package_names(channel) # empty string means: show RSS feed + _available_package_names = [""] + get_package_names( + channel + ) # empty string means: show RSS feed package_name = st.selectbox( "Enter a package name:", options=_available_package_names, key="package_name", help=f"Choose one package out of the {len(_available_package_names) - 1:,} available ones. " - "Underscore-leading names are sorted last." + "Underscore-leading names are sorted last.", + ) + _available_subdirs = get_arch_subdirs_for_package( + package_name, channel, with_broken=with_broken ) - _available_subdirs = get_arch_subdirs_for_package(package_name, channel, with_broken=with_broken) _best_subdir, _best_version = _best_version_in_subdir( package_name, channel, with_broken=with_broken ) @@ -823,9 +889,7 @@ def input_value_so_far(): def disable_button(query): - if re.match( - r"^[a-z0-9-]+/[a-z0-9-]+::[a-z0-9-]+-[0-9.]+-[a-z0-9_]+.[a-z0-9]+$", query - ): + if re.match(r"^[a-z0-9-]+/[a-z0-9-]+::[a-z0-9-]+-[0-9.]+-[a-z0-9_]+.[a-z0-9]+$", query): return False if all([channel, subdir, package_name, version, build, extension]): return False @@ -835,13 +899,13 @@ def disable_button(query): c1, c2 = st.columns([1, 0.15]) with c1: query = st.text_input( - label="Search artifact metadata:", - placeholder="channel/subdir::package_name-version-build.ext", - value=input_value_so_far(), - label_visibility="collapsed", - key="query_input", - disabled=True, - ) + label="Search artifact metadata:", + placeholder="channel/subdir::package_name-version-build.ext", + value=input_value_so_far(), + label_visibility="collapsed", + key="query_input", + disabled=True, + ) with c2: submitted = st.button( "Submit", @@ -864,8 +928,12 @@ def disable_button(query): subdir=subdir, artifact=artifact, ) - if not data and artifact.endswith(".tar.bz2") and get_channel_config(channel).metadata_retrieval == MetadataRetrieval.STREAMED: - st.warning(f"Cannot retrieve metadata of an tar.bz2 artifact for non-OCI channels.") + if ( + not data + and artifact.endswith(".tar.bz2") + and get_channel_config(channel).metadata_retrieval == MetadataRetrieval.STREAMED + ): + st.warning("Cannot retrieve metadata of an tar.bz2 artifact for non-OCI channels.") st.stop() elif not data: logger.error(f"No metadata found for `{query}`.") @@ -903,8 +971,8 @@ def disable_button(query): else: name = url_text = url.split("/")[-1] if ( - show_archived - and channel == "conda-forge" + show_archived + and channel == "conda-forge" and is_archived_repo(f"conda-forge/{name}") ): url_text = f"~~{url_text}~~" @@ -915,7 +983,9 @@ def disable_button(query): logger.error(exc, exc_info=True) with_patches_requested = with_patches and get_channel_config(channel).repodata_patches_package - patches_supported = get_channel_config(channel).artifact_discovery == ArtifactDiscoveryChoice.ANACONDA_API + patches_supported = ( + get_channel_config(channel).artifact_discovery == ArtifactDiscoveryChoice.ANACONDA_API + ) if with_patches_requested and not patches_supported: st.error( @@ -931,38 +1001,39 @@ def disable_button(query): st.markdown(f'## {"❌ " if yanked else ""}{data["name"]} {data["version"]}') if yanked: - st.error( - "This artifact has been removed from the index and is only available via URL." - ) + st.error("This artifact has been removed from the index and is only available via URL.") about = data.get("about") or data.get("rendered_recipe", {}).get("about", {}) dashboard_urls = { - dashboard_name: app_config().dashboards[dashboard_name].url_pattern.format( - channel=channel.split('/', 1)[-1], subdir=subdir, name=data["name"], version=data["version"] + dashboard_name: app_config() + .dashboards[dashboard_name] + .url_pattern.format( + channel=channel.split("/", 1)[-1], + subdir=subdir, + name=data["name"], + version=data["version"], ) for dashboard_name in get_channel_config(channel).dashboards } - dashboard_markdown_links = [ - f"[{name}]({url})" for name, url in dashboard_urls.items() - ] - dashboard_markdown_links = " · ".join(dashboard_markdown_links) if dashboard_markdown_links else "-" + dashboard_markdown_links = [f"[{name}]({url})" for name, url in dashboard_urls.items()] + dashboard_markdown_links = ( + " · ".join(dashboard_markdown_links) if dashboard_markdown_links else "-" + ) build_str = data.get("index", {}).get("build", "*N/A*") if build_str == "*N/A*": download = "*N/A*" else: _download_url = get_channel_config(channel).get_artifact_download_url( arch_subdir=subdir, - package_name=data['name'], - version=data['version'], + package_name=data["name"], + version=data["version"], build_string=build_str, - extension=extension + extension=extension, ) download = f"[artifact download]({_download_url})" maintainers = [] for user in ( - data.get("rendered_recipe", {}) - .get("extra", {}) - .get("recipe-maintainers", ["*N/A*"]) + data.get("rendered_recipe", {}).get("extra", {}).get("recipe-maintainers", ["*N/A*"]) ): if user == "*N/A*": maintainers.append(user) @@ -1056,7 +1127,9 @@ def disable_button(query): published = item.find("pubDate").text more_url = f"/?q={channel}/{name}" table.append(f"| {n} | [{name}]({more_url})| {version} | {platforms} | {published}") - st.markdown(f"## Latest {n} updates in [{channel}](https://anaconda.org/{channel.split('/', 1)[-1]})") + st.markdown( + f"## Latest {n} updates in [{channel}](https://anaconda.org/{channel.split('/', 1)[-1]})" + ) st.markdown(f"> Last update: {rss_ret.find('channel/pubDate').text}.") st.markdown("\n".join(table)) elif isinstance(data, str) and data.startswith("error:"): diff --git a/conda_metadata_app/pages/search_by_file_path_page.py b/conda_metadata_app/pages/search_by_file_path_page.py index 1948a8d..d98d343 100644 --- a/conda_metadata_app/pages/search_by_file_path_page.py +++ b/conda_metadata_app/pages/search_by_file_path_page.py @@ -1,3 +1,11 @@ +""" +This optional page connects to a Datassette instance to list artifacts +that contain a certain file path. + +The code and data for the Datassette instance can be found at +https://github.com/Quansight-Labs/conda-forge-paths. +""" + import time from inspect import cleandoc @@ -5,8 +13,6 @@ import streamlit as st from streamlit_searchbox import st_searchbox -from conda_metadata_app.app_config import AppConfig - @st.cache_resource(ttl="15m", max_entries=100) def autocomplete_paths(query): diff --git a/conda_metadata_app/version_order.py b/conda_metadata_app/version_order.py index f2a4431..ace7f29 100644 --- a/conda_metadata_app/version_order.py +++ b/conda_metadata_app/version_order.py @@ -8,6 +8,7 @@ :top-classes: conda.models.version.BaseSpec :parts: 1 """ + from __future__ import annotations import re @@ -200,9 +201,7 @@ def __init__(self, vstr): # This is an error because specifying only a local version is invalid. # version[0] is empty because vstr.split("+") returns something like ['', '1.2'] if version[0] == "": - raise InvalidVersionSpec( - vstr, "Missing version before local version separator '+'" - ) + raise InvalidVersionSpec(vstr, "Missing version before local version separator '+'") if version[0][-1] == "_": # If the last character of version is "-" or "_", don't split that out @@ -252,9 +251,7 @@ def _eq(self, t1, t2): return True def __eq__(self, other): - return self._eq(self.version, other.version) and self._eq( - self.local, other.local - ) + return self._eq(self.version, other.version) and self._eq(self.local, other.local) def startswith(self, other): # Tests if the version lists match up to the last element in "other". diff --git a/docs/configuration.md b/docs/configuration.md index 2ee4aa2..447a1d5 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1,6 +1,6 @@ # Configure `app_config.toml` -The `app_config.toml` is a configuration file that is used to setup the `conda-metadata-app` which allows you to browse metadata of conda packages. +The `app_config.toml` is a configuration file that is used to setup the `conda-metadata-app` which allows you to browse metadata of conda packages. ## Global Configuration ### File Path Search @@ -110,7 +110,7 @@ arch_subdir_discovery = { subdirs = ["linux-64", "osx-64", "win-64"] } Recommended to use `channeldata` if available, as it is more efficient. #### Repodata Patches - + `repodata_patches_package`: Specifies a package that includes patches for "repodata". It is expected to be available in the channel. ```toml @@ -213,11 +213,11 @@ anaconda = { url_pattern = "https://anaconda.org/{channel}/{name}/files?version= ghcr = { url_pattern = "https://github.com/orgs/channel-mirrors/packages/container/package/{channel}%2F{subdir}%2F{name}" } prefix = { url_pattern = "https://prefix.dev/channels/{channel}/packages/{name}" } ``` -The `url_pattern` can contain placeholders that are encompassed in curly braces `{}`. These placeholders get replaced with the relevant value when generating the URLs. Current available placeholders include `{channel}`, `{name}`, `{version}`, and `{subdir}`. +The `url_pattern` can contain placeholders that are encompassed in curly braces `{}`. These placeholders get replaced with the relevant value when generating the URLs. Current available placeholders include `{channel}`, `{name}`, `{version}`, and `{subdir}`. Note that the `app_config.toml` file is loaded by the application at runtime. Therefore, any changes made while the app is running will take effect only after you restart the application. Ensure to follow the correct syntax to avoid runtime errors. ## Complete Schema Refer to [app_config.py](../app_config.py) for the complete configuration schema. -You can configure your IDE to use the [app_config.schema.json](../app_config.schema.json) file for auto-completion and validation of the `app_config.toml` file. \ No newline at end of file +You can configure your IDE to use the [app_config.schema.json](../app_config.schema.json) file for auto-completion and validation of the `app_config.toml` file. diff --git a/pyproject.toml b/pyproject.toml index 2f23b66..0599202 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,3 +32,34 @@ dependencies = [ [project.urls] repository = "https://github.com/Quansight-Labs/conda-metadata-app" + +[tool.ruff] +target-version = "py312" +line-length = 99 + +[tool.ruff.lint] +# E, W = pycodestyle errors and warnings +# F = pyflakes +# I = isort +# D = pydocstyle +# UP = pyupgrade +# ISC = flake8-implicit-str-concat +# TCH = flake8-type-checking +# T10 = flake8-debugger +# FA = flake8-future-annotations +# see also https://docs.astral.sh/ruff/rules/ +select = ["E", "W", "F", "I", "D1", "UP", "ISC", "TCH", "T10", "FA"] +# E402 module level import not at top of file +# E501 line too long +# E722 do not use bare 'except' +# E731 do not assign a lambda expression, use a def +# D101 Missing docstring in public class +# D102 Missing docstring in public method +# D103 Missing docstring in public function +# D104 Missing docstring in public package +# D105 Missing docstring in magic method +# D107 Missing docstring in `__init__` +ignore = ["E402", "E501", "E722", "E731", "D101", "D102", "D103", "D104", "D105", "D107", "ISC001"] +extend-per-file-ignores = {"docs/*" = ["D1"], "tests/*" = ["D1"]} +pydocstyle = {convention = "pep257"} +flake8-type-checking = {exempt-modules = [], strict = true}