From f35f0c1d713d06e402fc7316e1066c688407d34e Mon Sep 17 00:00:00 2001 From: James Addison Date: Sat, 19 Oct 2024 20:59:33 +0100 Subject: [PATCH 01/14] linkcheck: use py311 `StrEnum` for status codes --- sphinx/builders/linkcheck.py | 87 ++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 33 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index fdc3915cce4..1a6848eaebd 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -7,6 +7,7 @@ import re import socket import time +from enum import StrEnum from html.parser import HTMLParser from os import path from queue import PriorityQueue, Queue @@ -65,6 +66,17 @@ DEFAULT_DELAY = 60.0 +class LinkStatus(StrEnum): + BROKEN = 'broken' + IGNORED = 'ignored' + TIMEOUT = 'timeout' + RATE_LIMITED = 'rate-limited' + REDIRECTED = 'redirected' + UNCHECKED = 'unchecked' + UNKNOWN = 'unknown' + WORKING = 'working' + + class CheckExternalLinksBuilder(DummyBuilder): """ Checks for broken external links. @@ -109,13 +121,13 @@ def process_result(self, result: CheckResult) -> None: } self.write_linkstat(linkstat) - if result.status == 'unchecked': + if result.status == LinkStatus.UNCHECKED: return - if result.status == 'working' and result.message == 'old': + if result.status == LinkStatus.WORKING and result.message == 'old': return if result.lineno: logger.info('(%16s: line %4d) ', result.docname, result.lineno, nonl=True) - if result.status == 'ignored': + if result.status == LinkStatus.IGNORED: if result.message: logger.info(darkgray('-ignored- ') + result.uri + ': ' + result.message) else: @@ -125,9 +137,9 @@ def process_result(self, result: CheckResult) -> None: self.write_entry( 'local', result.docname, filename, result.lineno, result.uri ) - elif result.status == 'working': + elif result.status == LinkStatus.WORKING: logger.info(darkgreen('ok ') + result.uri + result.message) - elif result.status == 'timeout': + elif result.status == LinkStatus.TIMEOUT: if self.app.quiet: logger.warning( 'timeout ' + result.uri + result.message, @@ -138,14 +150,14 @@ def process_result(self, result: CheckResult) -> None: red('timeout ') + result.uri + red(' - ' + result.message) ) self.write_entry( - 'timeout', + LinkStatus.TIMEOUT, result.docname, filename, result.lineno, result.uri + ': ' + result.message, ) self.timed_out_hyperlinks += 1 - elif result.status == 'broken': + elif result.status == LinkStatus.BROKEN: if self.app.quiet: logger.warning( __('broken link: %s (%s)'), @@ -158,14 +170,14 @@ def process_result(self, result: CheckResult) -> None: red('broken ') + result.uri + red(' - ' + result.message) ) self.write_entry( - 'broken', + result.status, result.docname, filename, result.lineno, result.uri + ': ' + result.message, ) self.broken_hyperlinks += 1 - elif result.status == 'redirected': + elif result.status == LinkStatus.REDIRECTED: try: text, color = { 301: ('permanently', purple), @@ -306,7 +318,12 @@ def check(self, hyperlinks: dict[str, Hyperlink]) -> Iterator[CheckResult]: for hyperlink in hyperlinks.values(): if self.is_ignored_uri(hyperlink.uri): yield CheckResult( - hyperlink.uri, hyperlink.docname, hyperlink.lineno, 'ignored', '', 0 + uri=hyperlink.uri, + docname=hyperlink.docname, + lineno=hyperlink.lineno, + status=LinkStatus.IGNORED, + message='', + code=0, ) else: self.wqueue.put(CheckRequest(CHECK_IMMEDIATELY, hyperlink), False) @@ -388,11 +405,11 @@ def __init__( self.retries: int = config.linkcheck_retries self.rate_limit_timeout = config.linkcheck_rate_limit_timeout self._allow_unauthorized = config.linkcheck_allow_unauthorized - self._timeout_status: Literal['broken', 'timeout'] + self._timeout_status: Literal[LinkStatus.BROKEN, LinkStatus.TimeOut] if config.linkcheck_report_timeouts_as_broken: - self._timeout_status = 'broken' + self._timeout_status: LinkStatus.BROKEN else: - self._timeout_status = 'timeout' + self._timeout_status = LinkStatus.TIMEOUT self.user_agent = config.user_agent self.tls_verify = config.tls_verify @@ -429,7 +446,7 @@ def run(self) -> None: self.wqueue.task_done() continue status, info, code = self._check(docname, uri, hyperlink) - if status == 'rate-limited': + if status == LinkStatus.RATE_LIMITED: logger.info( darkgray('-rate limited- ') + uri + darkgray(' | sleeping...') ) @@ -448,26 +465,26 @@ def _check( f'{docname} matched {doc_matcher.pattern} from ' 'linkcheck_exclude_documents' ) - return 'ignored', info, 0 + return LinkStatus.IGNORED, info, 0 if len(uri) == 0 or uri.startswith(('#', 'mailto:', 'tel:')): - return 'unchecked', '', 0 + return LinkStatus.UNCHECKED, '', 0 if not uri.startswith(('http:', 'https:')): if uri_re.match(uri): # Non-supported URI schemes (ex. ftp) - return 'unchecked', '', 0 + return LinkStatus.UNCHECKED, '', 0 src_dir = path.dirname(hyperlink.docpath) if path.exists(path.join(src_dir, uri)): - return 'working', '', 0 - return 'broken', '', 0 + return LinkStatus.WORKING, '', 0 + return LinkStatus.BROKEN, '', 0 # need to actually check the URI status: _StatusUnknown status, info, code = '', '', 0 for _ in range(self.retries): status, info, code = self._check_uri(uri, hyperlink) - if status != 'broken': + if status != LinkStatus.BROKEN: break return status, info, code @@ -536,10 +553,14 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: try: found = contains_anchor(response, anchor) except UnicodeDecodeError: - return 'ignored', 'unable to decode response content', 0 + return ( + LinkStatus.IGNORED, + 'unable to decode response content', + 0, + ) if not found: return ( - 'broken', + LinkStatus.BROKEN, __("Anchor '%s' not found") % quote(anchor), 0, ) @@ -560,7 +581,7 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: except SSLError as err: # SSL failure; report that the link is broken. - return 'broken', str(err), 0 + return LinkStatus.BROKEN, str(err), 0 except (ConnectionError, TooManyRedirects) as err: # Servers drop the connection on HEAD requests, causing @@ -574,20 +595,20 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: # Unauthorized: the client did not provide required credentials if status_code == 401: if self._allow_unauthorized: - return 'working', 'unauthorized', 0 + return LinkStatus.WORKING, 'unauthorized', 0 else: - return 'broken', 'unauthorized', 0 + return LinkStatus.BROKEN, 'unauthorized', 0 # Rate limiting; back-off if allowed, or report failure otherwise if status_code == 429: if next_check := self.limit_rate(response_url, retry_after): self.wqueue.put(CheckRequest(next_check, hyperlink), False) - return 'rate-limited', '', 0 - return 'broken', error_message, 0 + return LinkStatus.RATE_LIMITED, '', 0 + return LinkStatus.BROKEN, error_message, 0 # Don't claim success/failure during server-side outages if status_code == 503: - return 'ignored', 'service unavailable', 0 + return LinkStatus.IGNORED, 'service unavailable', 0 # For most HTTP failures, continue attempting alternate retrieval methods continue @@ -595,12 +616,12 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: except Exception as err: # Unhandled exception (intermittent or permanent); report that # the link is broken. - return 'broken', str(err), 0 + return LinkStatus.BROKEN, str(err), 0 else: # All available retrieval methods have been exhausted; report # that the link is broken. - return 'broken', error_message, 0 + return LinkStatus.BROKEN, error_message, 0 # Success; clear rate limits for the origin netloc = urlsplit(req_url).netloc @@ -610,11 +631,11 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: (response_url.rstrip('/') == req_url.rstrip('/')) or _allowed_redirect(req_url, response_url, self.allowed_redirects) ): # fmt: skip - return 'working', '', 0 + return LinkStatus.WORKING, '', 0 elif redirect_status_code is not None: - return 'redirected', response_url, redirect_status_code + return LinkStatus.REDIRECTED, response_url, redirect_status_code else: - return 'redirected', response_url, 0 + return LinkStatus.REDIRECTED, response_url, 0 def limit_rate(self, response_url: str, retry_after: str | None) -> float | None: delay = DEFAULT_DELAY From 139775a5ad3f56b83f5e487a0871868cc549adf0 Mon Sep 17 00:00:00 2001 From: James Addison Date: Sat, 19 Oct 2024 21:06:08 +0100 Subject: [PATCH 02/14] Fixup: `TIMEOUT`, not `TimeOut` --- sphinx/builders/linkcheck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 1a6848eaebd..92891b17b7e 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -405,7 +405,7 @@ def __init__( self.retries: int = config.linkcheck_retries self.rate_limit_timeout = config.linkcheck_rate_limit_timeout self._allow_unauthorized = config.linkcheck_allow_unauthorized - self._timeout_status: Literal[LinkStatus.BROKEN, LinkStatus.TimeOut] + self._timeout_status: Literal[LinkStatus.BROKEN, LinkStatus.TIMEOUT] if config.linkcheck_report_timeouts_as_broken: self._timeout_status: LinkStatus.BROKEN else: From f35a605338f9c81947a3a5055fa3f47ff78cf9d6 Mon Sep 17 00:00:00 2001 From: James Addison Date: Sat, 19 Oct 2024 21:09:39 +0100 Subject: [PATCH 03/14] Fixup: assignment, not typehint --- sphinx/builders/linkcheck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 92891b17b7e..0ed109ab91d 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -407,7 +407,7 @@ def __init__( self._allow_unauthorized = config.linkcheck_allow_unauthorized self._timeout_status: Literal[LinkStatus.BROKEN, LinkStatus.TIMEOUT] if config.linkcheck_report_timeouts_as_broken: - self._timeout_status: LinkStatus.BROKEN + self._timeout_status = LinkStatus.BROKEN else: self._timeout_status = LinkStatus.TIMEOUT From d2491fc57aa6ae7c906889ebfa015d47899f3d79 Mon Sep 17 00:00:00 2001 From: James Addison Date: Sat, 19 Oct 2024 21:10:22 +0100 Subject: [PATCH 04/14] Fixup: re-declare `_Status` type in terms of `LinkStatus` values --- sphinx/builders/linkcheck.py | 39 ++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 0ed109ab91d..cb43df14440 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -28,6 +28,18 @@ from sphinx.util.http_date import rfc1123_to_epoch from sphinx.util.nodes import get_node_line + +class LinkStatus(StrEnum): + BROKEN = 'broken' + IGNORED = 'ignored' + TIMEOUT = 'timeout' + RATE_LIMITED = 'rate-limited' + REDIRECTED = 'redirected' + UNCHECKED = 'unchecked' + UNKNOWN = 'unknown' + WORKING = 'working' + + if TYPE_CHECKING: from collections.abc import Callable, Iterator from typing import Any, Literal, TypeAlias @@ -40,14 +52,14 @@ from sphinx.util.typing import ExtensionMetadata _Status: TypeAlias = Literal[ - 'broken', - 'ignored', - 'local', - 'rate-limited', - 'redirected', - 'timeout', - 'unchecked', - 'working', + LinkStatus.BROKEN, + LinkStatus.IGNORED, + LinkStatus.TIMEOUT, + LinkStatus.RATE_LIMITED, + LinkStatus.REDIRECTED, + LinkStatus.UNCHECKED, + LinkStatus.UNKNOWN, + LinkStatus.WORKING, ] _StatusUnknown: TypeAlias = _Status | Literal[''] _URIProperties: TypeAlias = tuple[_Status, str, int] @@ -66,17 +78,6 @@ DEFAULT_DELAY = 60.0 -class LinkStatus(StrEnum): - BROKEN = 'broken' - IGNORED = 'ignored' - TIMEOUT = 'timeout' - RATE_LIMITED = 'rate-limited' - REDIRECTED = 'redirected' - UNCHECKED = 'unchecked' - UNKNOWN = 'unknown' - WORKING = 'working' - - class CheckExternalLinksBuilder(DummyBuilder): """ Checks for broken external links. From d093ff73026df7dd96cd132b20e90228c4d01b7f Mon Sep 17 00:00:00 2001 From: James Addison Date: Sat, 19 Oct 2024 21:13:22 +0100 Subject: [PATCH 05/14] Fixup: restore `local` value to `_Status` type --- sphinx/builders/linkcheck.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index cb43df14440..20112725201 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -54,6 +54,7 @@ class LinkStatus(StrEnum): _Status: TypeAlias = Literal[ LinkStatus.BROKEN, LinkStatus.IGNORED, + LinkStatus.LOCAL, LinkStatus.TIMEOUT, LinkStatus.RATE_LIMITED, LinkStatus.REDIRECTED, @@ -133,10 +134,10 @@ def process_result(self, result: CheckResult) -> None: logger.info(darkgray('-ignored- ') + result.uri + ': ' + result.message) else: logger.info(darkgray('-ignored- ') + result.uri) - elif result.status == 'local': + elif result.status == LinkStatus.LOCAL: logger.info(darkgray('-local- ') + result.uri) self.write_entry( - 'local', result.docname, filename, result.lineno, result.uri + result.status, result.docname, filename, result.lineno, result.uri ) elif result.status == LinkStatus.WORKING: logger.info(darkgreen('ok ') + result.uri + result.message) From f79e444d3524557d8749098d7e2f846c49d433d4 Mon Sep 17 00:00:00 2001 From: James Addison Date: Sat, 19 Oct 2024 21:23:17 +0100 Subject: [PATCH 06/14] Fixup: add `LOCAL` to `LinkStatus` enum Relates-to commit d093ff73026df7dd96cd132b20e90228c4d01b7f. --- sphinx/builders/linkcheck.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 20112725201..ff553a1af02 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -32,6 +32,7 @@ class LinkStatus(StrEnum): BROKEN = 'broken' IGNORED = 'ignored' + LOCAL = 'local' TIMEOUT = 'timeout' RATE_LIMITED = 'rate-limited' REDIRECTED = 'redirected' From 3cda29a210a0d95a6cb17d9c447cb2c51cd04441 Mon Sep 17 00:00:00 2001 From: James Addison Date: Sat, 19 Oct 2024 21:25:58 +0100 Subject: [PATCH 07/14] Consistency: always call `write_entry` with specific status code --- sphinx/builders/linkcheck.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index ff553a1af02..d6754c3d7a7 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -138,7 +138,11 @@ def process_result(self, result: CheckResult) -> None: elif result.status == LinkStatus.LOCAL: logger.info(darkgray('-local- ') + result.uri) self.write_entry( - result.status, result.docname, filename, result.lineno, result.uri + LinkStatus.LOCAL, + result.docname, + filename, + result.lineno, + result.uri, ) elif result.status == LinkStatus.WORKING: logger.info(darkgreen('ok ') + result.uri + result.message) @@ -173,7 +177,7 @@ def process_result(self, result: CheckResult) -> None: red('broken ') + result.uri + red(' - ' + result.message) ) self.write_entry( - result.status, + LinkStatus.BROKEN, result.docname, filename, result.lineno, From f039188dbe4d263276b203f650b8c745ea2f112b Mon Sep 17 00:00:00 2001 From: James Addison Date: Sat, 19 Oct 2024 21:40:19 +0100 Subject: [PATCH 08/14] Refactor-out exhaustive listing of enum statuses in TypeAlias --- sphinx/builders/linkcheck.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index d6754c3d7a7..d3d7d55de6d 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -43,7 +43,7 @@ class LinkStatus(StrEnum): if TYPE_CHECKING: from collections.abc import Callable, Iterator - from typing import Any, Literal, TypeAlias + from typing import Any, Literal, Union, TypeAlias from requests import Response @@ -52,20 +52,10 @@ class LinkStatus(StrEnum): from sphinx.util._pathlib import _StrPath from sphinx.util.typing import ExtensionMetadata - _Status: TypeAlias = Literal[ - LinkStatus.BROKEN, - LinkStatus.IGNORED, - LinkStatus.LOCAL, - LinkStatus.TIMEOUT, - LinkStatus.RATE_LIMITED, - LinkStatus.REDIRECTED, - LinkStatus.UNCHECKED, - LinkStatus.UNKNOWN, - LinkStatus.WORKING, - ] - _StatusUnknown: TypeAlias = _Status | Literal[''] - _URIProperties: TypeAlias = tuple[_Status, str, int] - _URIPropertiesUnknown: TypeAlias = tuple[_StatusUnknown, str, int] + _Status: TypeAlias = LinkStatus + _StatusUnknown: TypeAlias = Union[LinkStatus, None] + _URIProperties: TypeAlias = tuple[LinkStatus, str, int] + _URIPropertiesUnknown: TypeAlias = tuple[LinkStatus | None, str, int] logger = logging.getLogger(__name__) @@ -114,7 +104,7 @@ def finish(self) -> None: def process_result(self, result: CheckResult) -> None: filename = self.env.doc2path(result.docname, False) - linkstat: dict[str, str | int | _Status] = { + linkstat: dict[str, str | int | _StatusUnknown] = { 'filename': str(filename), 'lineno': result.lineno, 'status': result.status, @@ -218,7 +208,7 @@ def process_result(self, result: CheckResult) -> None: msg = f'Unknown status {result.status!r}.' raise ValueError(msg) - def write_linkstat(self, data: dict[str, str | int]) -> None: + def write_linkstat(self, data: dict[str, str | int | _StatusUnknown]) -> None: self.json_outfile.write(json.dumps(data)) self.json_outfile.write('\n') @@ -488,7 +478,7 @@ def _check( # need to actually check the URI status: _StatusUnknown - status, info, code = '', '', 0 + status, info, code = None, '', 0 for _ in range(self.retries): status, info, code = self._check_uri(uri, hyperlink) if status != LinkStatus.BROKEN: From d13d69660ba421c5578b84cad02fa9d6adf56820 Mon Sep 17 00:00:00 2001 From: James Addison Date: Sat, 19 Oct 2024 21:42:09 +0100 Subject: [PATCH 09/14] Factor-out `typing.Union` usage --- sphinx/builders/linkcheck.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index d3d7d55de6d..2be99bf6731 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -43,7 +43,7 @@ class LinkStatus(StrEnum): if TYPE_CHECKING: from collections.abc import Callable, Iterator - from typing import Any, Literal, Union, TypeAlias + from typing import Any, Literal, TypeAlias from requests import Response @@ -53,7 +53,7 @@ class LinkStatus(StrEnum): from sphinx.util.typing import ExtensionMetadata _Status: TypeAlias = LinkStatus - _StatusUnknown: TypeAlias = Union[LinkStatus, None] + _StatusUnknown: TypeAlias = LinkStatus | None _URIProperties: TypeAlias = tuple[LinkStatus, str, int] _URIPropertiesUnknown: TypeAlias = tuple[LinkStatus | None, str, int] From 55e42cbf5a5995c8c7f4edc0c5a0a2751ee26a3d Mon Sep 17 00:00:00 2001 From: James Addison Date: Sun, 20 Oct 2024 11:57:22 +0100 Subject: [PATCH 10/14] Refactor: apply code review suggestions --- sphinx/builders/linkcheck.py | 114 +++++++++++++++++------------------ 1 file changed, 54 insertions(+), 60 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 2be99bf6731..17b4cd87d8d 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -28,19 +28,6 @@ from sphinx.util.http_date import rfc1123_to_epoch from sphinx.util.nodes import get_node_line - -class LinkStatus(StrEnum): - BROKEN = 'broken' - IGNORED = 'ignored' - LOCAL = 'local' - TIMEOUT = 'timeout' - RATE_LIMITED = 'rate-limited' - REDIRECTED = 'redirected' - UNCHECKED = 'unchecked' - UNKNOWN = 'unknown' - WORKING = 'working' - - if TYPE_CHECKING: from collections.abc import Callable, Iterator from typing import Any, Literal, TypeAlias @@ -52,10 +39,21 @@ class LinkStatus(StrEnum): from sphinx.util._pathlib import _StrPath from sphinx.util.typing import ExtensionMetadata - _Status: TypeAlias = LinkStatus - _StatusUnknown: TypeAlias = LinkStatus | None - _URIProperties: TypeAlias = tuple[LinkStatus, str, int] - _URIPropertiesUnknown: TypeAlias = tuple[LinkStatus | None, str, int] + _URIProperties: TypeAlias = tuple["_Status", str, int] + _URIPropertiesUnknown: TypeAlias = tuple["_Status" | None, str, int] + + +class _Status(StrEnum): + BROKEN = 'broken' + IGNORED = 'ignored' + LOCAL = 'local' + TIMEOUT = 'timeout' + RATE_LIMITED = 'rate-limited' + REDIRECTED = 'redirected' + UNCHECKED = 'unchecked' + UNKNOWN = 'unknown' + WORKING = 'working' + logger = logging.getLogger(__name__) @@ -104,7 +102,7 @@ def finish(self) -> None: def process_result(self, result: CheckResult) -> None: filename = self.env.doc2path(result.docname, False) - linkstat: dict[str, str | int | _StatusUnknown] = { + linkstat: dict[str, str | int | _Status | None] = { 'filename': str(filename), 'lineno': result.lineno, 'status': result.status, @@ -114,29 +112,29 @@ def process_result(self, result: CheckResult) -> None: } self.write_linkstat(linkstat) - if result.status == LinkStatus.UNCHECKED: + if result.status == _Status.UNCHECKED: return - if result.status == LinkStatus.WORKING and result.message == 'old': + if result.status == _Status.WORKING and result.message == 'old': return if result.lineno: logger.info('(%16s: line %4d) ', result.docname, result.lineno, nonl=True) - if result.status == LinkStatus.IGNORED: + if result.status == _Status.IGNORED: if result.message: logger.info(darkgray('-ignored- ') + result.uri + ': ' + result.message) else: logger.info(darkgray('-ignored- ') + result.uri) - elif result.status == LinkStatus.LOCAL: + elif result.status == _Status.LOCAL: logger.info(darkgray('-local- ') + result.uri) self.write_entry( - LinkStatus.LOCAL, + _Status.LOCAL, result.docname, filename, result.lineno, result.uri, ) - elif result.status == LinkStatus.WORKING: + elif result.status == _Status.WORKING: logger.info(darkgreen('ok ') + result.uri + result.message) - elif result.status == LinkStatus.TIMEOUT: + elif result.status == _Status.TIMEOUT: if self.app.quiet: logger.warning( 'timeout ' + result.uri + result.message, @@ -147,14 +145,14 @@ def process_result(self, result: CheckResult) -> None: red('timeout ') + result.uri + red(' - ' + result.message) ) self.write_entry( - LinkStatus.TIMEOUT, + _Status.TIMEOUT, result.docname, filename, result.lineno, result.uri + ': ' + result.message, ) self.timed_out_hyperlinks += 1 - elif result.status == LinkStatus.BROKEN: + elif result.status == _Status.BROKEN: if self.app.quiet: logger.warning( __('broken link: %s (%s)'), @@ -167,14 +165,14 @@ def process_result(self, result: CheckResult) -> None: red('broken ') + result.uri + red(' - ' + result.message) ) self.write_entry( - LinkStatus.BROKEN, + _Status.BROKEN, result.docname, filename, result.lineno, result.uri + ': ' + result.message, ) self.broken_hyperlinks += 1 - elif result.status == LinkStatus.REDIRECTED: + elif result.status == _Status.REDIRECTED: try: text, color = { 301: ('permanently', purple), @@ -208,7 +206,7 @@ def process_result(self, result: CheckResult) -> None: msg = f'Unknown status {result.status!r}.' raise ValueError(msg) - def write_linkstat(self, data: dict[str, str | int | _StatusUnknown]) -> None: + def write_linkstat(self, data: dict[str, str | int | _Status | None]) -> None: self.json_outfile.write(json.dumps(data)) self.json_outfile.write('\n') @@ -318,7 +316,7 @@ def check(self, hyperlinks: dict[str, Hyperlink]) -> Iterator[CheckResult]: uri=hyperlink.uri, docname=hyperlink.docname, lineno=hyperlink.lineno, - status=LinkStatus.IGNORED, + status=_Status.IGNORED, message='', code=0, ) @@ -359,7 +357,7 @@ class CheckResult(NamedTuple): uri: str docname: str lineno: int - status: _StatusUnknown + status: _Status | None message: str code: int @@ -402,11 +400,11 @@ def __init__( self.retries: int = config.linkcheck_retries self.rate_limit_timeout = config.linkcheck_rate_limit_timeout self._allow_unauthorized = config.linkcheck_allow_unauthorized - self._timeout_status: Literal[LinkStatus.BROKEN, LinkStatus.TIMEOUT] + self._timeout_status: Literal[_Status.BROKEN, _Status.TIMEOUT] if config.linkcheck_report_timeouts_as_broken: - self._timeout_status = LinkStatus.BROKEN + self._timeout_status = _Status.BROKEN else: - self._timeout_status = LinkStatus.TIMEOUT + self._timeout_status = _Status.TIMEOUT self.user_agent = config.user_agent self.tls_verify = config.tls_verify @@ -443,7 +441,7 @@ def run(self) -> None: self.wqueue.task_done() continue status, info, code = self._check(docname, uri, hyperlink) - if status == LinkStatus.RATE_LIMITED: + if status == _Status.RATE_LIMITED: logger.info( darkgray('-rate limited- ') + uri + darkgray(' | sleeping...') ) @@ -462,26 +460,26 @@ def _check( f'{docname} matched {doc_matcher.pattern} from ' 'linkcheck_exclude_documents' ) - return LinkStatus.IGNORED, info, 0 + return _Status.IGNORED, info, 0 if len(uri) == 0 or uri.startswith(('#', 'mailto:', 'tel:')): - return LinkStatus.UNCHECKED, '', 0 + return _Status.UNCHECKED, '', 0 if not uri.startswith(('http:', 'https:')): if uri_re.match(uri): # Non-supported URI schemes (ex. ftp) - return LinkStatus.UNCHECKED, '', 0 + return _Status.UNCHECKED, '', 0 src_dir = path.dirname(hyperlink.docpath) if path.exists(path.join(src_dir, uri)): - return LinkStatus.WORKING, '', 0 - return LinkStatus.BROKEN, '', 0 + return _Status.WORKING, '', 0 + return _Status.BROKEN, '', 0 # need to actually check the URI - status: _StatusUnknown + status: _Status | None status, info, code = None, '', 0 for _ in range(self.retries): status, info, code = self._check_uri(uri, hyperlink) - if status != LinkStatus.BROKEN: + if status != _Status.BROKEN: break return status, info, code @@ -550,14 +548,10 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: try: found = contains_anchor(response, anchor) except UnicodeDecodeError: - return ( - LinkStatus.IGNORED, - 'unable to decode response content', - 0, - ) + return _Status.IGNORED, 'unable to decode response content', 0 if not found: return ( - LinkStatus.BROKEN, + _Status.BROKEN, __("Anchor '%s' not found") % quote(anchor), 0, ) @@ -578,7 +572,7 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: except SSLError as err: # SSL failure; report that the link is broken. - return LinkStatus.BROKEN, str(err), 0 + return _Status.BROKEN, str(err), 0 except (ConnectionError, TooManyRedirects) as err: # Servers drop the connection on HEAD requests, causing @@ -592,20 +586,20 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: # Unauthorized: the client did not provide required credentials if status_code == 401: if self._allow_unauthorized: - return LinkStatus.WORKING, 'unauthorized', 0 + return _Status.WORKING, 'unauthorized', 0 else: - return LinkStatus.BROKEN, 'unauthorized', 0 + return _Status.BROKEN, 'unauthorized', 0 # Rate limiting; back-off if allowed, or report failure otherwise if status_code == 429: if next_check := self.limit_rate(response_url, retry_after): self.wqueue.put(CheckRequest(next_check, hyperlink), False) - return LinkStatus.RATE_LIMITED, '', 0 - return LinkStatus.BROKEN, error_message, 0 + return _Status.RATE_LIMITED, '', 0 + return _Status.BROKEN, error_message, 0 # Don't claim success/failure during server-side outages if status_code == 503: - return LinkStatus.IGNORED, 'service unavailable', 0 + return _Status.IGNORED, 'service unavailable', 0 # For most HTTP failures, continue attempting alternate retrieval methods continue @@ -613,12 +607,12 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: except Exception as err: # Unhandled exception (intermittent or permanent); report that # the link is broken. - return LinkStatus.BROKEN, str(err), 0 + return _Status.BROKEN, str(err), 0 else: # All available retrieval methods have been exhausted; report # that the link is broken. - return LinkStatus.BROKEN, error_message, 0 + return _Status.BROKEN, error_message, 0 # Success; clear rate limits for the origin netloc = urlsplit(req_url).netloc @@ -628,11 +622,11 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: (response_url.rstrip('/') == req_url.rstrip('/')) or _allowed_redirect(req_url, response_url, self.allowed_redirects) ): # fmt: skip - return LinkStatus.WORKING, '', 0 + return _Status.WORKING, '', 0 elif redirect_status_code is not None: - return LinkStatus.REDIRECTED, response_url, redirect_status_code + return _Status.REDIRECTED, response_url, redirect_status_code else: - return LinkStatus.REDIRECTED, response_url, 0 + return _Status.REDIRECTED, response_url, 0 def limit_rate(self, response_url: str, retry_after: str | None) -> float | None: delay = DEFAULT_DELAY From 2da2fe2b618fa9310ecbedba5c2f94ba76cf3b1f Mon Sep 17 00:00:00 2001 From: James Addison Date: Sun, 20 Oct 2024 12:00:56 +0100 Subject: [PATCH 11/14] Lint fixup: apply `ruff format` --- sphinx/builders/linkcheck.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 17b4cd87d8d..e158d6f7b6f 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -39,8 +39,8 @@ from sphinx.util._pathlib import _StrPath from sphinx.util.typing import ExtensionMetadata - _URIProperties: TypeAlias = tuple["_Status", str, int] - _URIPropertiesUnknown: TypeAlias = tuple["_Status" | None, str, int] + _URIProperties: TypeAlias = tuple['_Status', str, int] + _URIPropertiesUnknown: TypeAlias = tuple['_Status' | None, str, int] class _Status(StrEnum): @@ -548,7 +548,11 @@ def _check_uri(self, uri: str, hyperlink: Hyperlink) -> _URIProperties: try: found = contains_anchor(response, anchor) except UnicodeDecodeError: - return _Status.IGNORED, 'unable to decode response content', 0 + return ( + _Status.IGNORED, + 'unable to decode response content', + 0, + ) if not found: return ( _Status.BROKEN, From 9e7d767f927eb4dc3c01de2e98cb142ae884186a Mon Sep 17 00:00:00 2001 From: James Addison Date: Sun, 20 Oct 2024 12:11:00 +0100 Subject: [PATCH 12/14] Change: utilise `unknown` status code instead of `None` for initial/placeholder status value --- sphinx/builders/linkcheck.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index e158d6f7b6f..e7a3ff7c346 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -40,7 +40,6 @@ from sphinx.util.typing import ExtensionMetadata _URIProperties: TypeAlias = tuple['_Status', str, int] - _URIPropertiesUnknown: TypeAlias = tuple['_Status' | None, str, int] class _Status(StrEnum): @@ -102,7 +101,7 @@ def finish(self) -> None: def process_result(self, result: CheckResult) -> None: filename = self.env.doc2path(result.docname, False) - linkstat: dict[str, str | int | _Status | None] = { + linkstat: dict[str, str | int | _Status] = { 'filename': str(filename), 'lineno': result.lineno, 'status': result.status, @@ -206,7 +205,7 @@ def process_result(self, result: CheckResult) -> None: msg = f'Unknown status {result.status!r}.' raise ValueError(msg) - def write_linkstat(self, data: dict[str, str | int | _Status | None]) -> None: + def write_linkstat(self, data: dict[str, str | int | _Status]) -> None: self.json_outfile.write(json.dumps(data)) self.json_outfile.write('\n') @@ -357,7 +356,7 @@ class CheckResult(NamedTuple): uri: str docname: str lineno: int - status: _Status | None + status: _Status message: str code: int @@ -451,7 +450,7 @@ def run(self) -> None: def _check( self, docname: str, uri: str, hyperlink: Hyperlink - ) -> _URIPropertiesUnknown: + ) -> _URIProperties: # check for various conditions without bothering the network for doc_matcher in self.documents_exclude: @@ -475,8 +474,8 @@ def _check( return _Status.BROKEN, '', 0 # need to actually check the URI - status: _Status | None - status, info, code = None, '', 0 + status: _Status + status, info, code = _Status.UNKNOWN, '', 0 for _ in range(self.retries): status, info, code = self._check_uri(uri, hyperlink) if status != _Status.BROKEN: From df4219bfe13a254f165adecad93e014bcc88913f Mon Sep 17 00:00:00 2001 From: James Addison Date: Sun, 20 Oct 2024 12:14:59 +0100 Subject: [PATCH 13/14] Lint fixup: apply `ruff format` --- sphinx/builders/linkcheck.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index e7a3ff7c346..46015a5c4c2 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -448,9 +448,7 @@ def run(self) -> None: self.rqueue.put(CheckResult(uri, docname, lineno, status, info, code)) self.wqueue.task_done() - def _check( - self, docname: str, uri: str, hyperlink: Hyperlink - ) -> _URIProperties: + def _check(self, docname: str, uri: str, hyperlink: Hyperlink) -> _URIProperties: # check for various conditions without bothering the network for doc_matcher in self.documents_exclude: From e6838adf8ac8e6bc4a44d7c51f8088b181e4b566 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sun, 20 Oct 2024 18:48:35 +0100 Subject: [PATCH 14/14] Alphabetical order --- sphinx/builders/linkcheck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinx/builders/linkcheck.py b/sphinx/builders/linkcheck.py index 46015a5c4c2..c74fa12a98a 100644 --- a/sphinx/builders/linkcheck.py +++ b/sphinx/builders/linkcheck.py @@ -46,9 +46,9 @@ class _Status(StrEnum): BROKEN = 'broken' IGNORED = 'ignored' LOCAL = 'local' - TIMEOUT = 'timeout' RATE_LIMITED = 'rate-limited' REDIRECTED = 'redirected' + TIMEOUT = 'timeout' UNCHECKED = 'unchecked' UNKNOWN = 'unknown' WORKING = 'working'