From 0849073cb9bea4f9c91b9957e467db7ad1e46227 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sun, 11 Aug 2024 21:23:12 +0100 Subject: [PATCH 1/3] Refactor ``BuildInfo`` --- sphinx/builders/html/__init__.py | 80 ++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 30 deletions(-) diff --git a/sphinx/builders/html/__init__.py b/sphinx/builders/html/__init__.py index 2fa5f360a95..f16b24b5db6 100644 --- a/sphinx/builders/html/__init__.py +++ b/sphinx/builders/html/__init__.py @@ -8,6 +8,7 @@ import os import posixpath import re +import shutil import sys import types import warnings @@ -129,19 +130,30 @@ class BuildInfo: """ @classmethod - def load(cls: type[BuildInfo], f: IO[str]) -> BuildInfo: + def load(cls: type[BuildInfo], filename: Path, /) -> BuildInfo: try: - lines = f.readlines() - assert lines[0].rstrip() == '# Sphinx build info version 1' - assert lines[2].startswith('config: ') - assert lines[3].startswith('tags: ') - - build_info = BuildInfo() - build_info.config_hash = lines[2].split()[1].strip() - build_info.tags_hash = lines[3].split()[1].strip() - return build_info - except Exception as exc: - raise ValueError(__('build info file is broken: %r') % exc) from exc + content = filename.read_text(encoding="utf-8") + except OSError as exc: + msg = __('could not read build info file: %r') % exc + raise ValueError(msg) from exc + lines = content.splitlines() + + version = lines[0].rstrip() + if version != '# Sphinx build info version 1': + msg = __('failed to read broken build info file (unknown version)') + raise ValueError(msg) + + if not lines[2].startswith('config: '): + msg = __('failed to read broken build info file (missing config entry)') + raise ValueError(msg) + if not lines[3].startswith('tags: '): + msg = __('failed to read broken build info file (missing tags entry)') + raise ValueError(msg) + + build_info = BuildInfo() + build_info.config_hash = lines[2].removeprefix('config: ').strip() + build_info.tags_hash = lines[3].removeprefix('tags: ').strip() + return build_info def __init__( self, @@ -163,13 +175,15 @@ def __eq__(self, other: BuildInfo) -> bool: # type: ignore[override] return (self.config_hash == other.config_hash and self.tags_hash == other.tags_hash) - def dump(self, f: IO[str]) -> None: - f.write('# Sphinx build info version 1\n' - '# This file hashes the configuration used when building these files.' - ' When it is not found, a full rebuild will be done.\n' - 'config: %s\n' - 'tags: %s\n' % - (self.config_hash, self.tags_hash)) + def dump(self, filename: Path, /) -> None: + build_info = ( + '# Sphinx build info version 1\n' + '# This file records the configuration used when building these files. ' + 'When it is not found, a full rebuild will be done.\n' + f'config: {self.config_hash}\n' + f'tags: {self.tags_hash}\n' + ) + filename.write_text(build_info, encoding="utf-8") class StandaloneHTMLBuilder(Builder): @@ -396,18 +410,25 @@ def math_renderer_name(self) -> str | None: def get_outdated_docs(self) -> Iterator[str]: build_info_fname = self.outdir / '.buildinfo' try: - with open(build_info_fname, encoding="utf-8") as fp: - buildinfo = BuildInfo.load(fp) + build_info = BuildInfo.load(build_info_fname) + except ValueError as exc: + logger.warning(__('Failed to read build info file: %r'), exc) + else: + if self.build_info != build_info: + # log the mismatch and backup the old build info + build_info_backup = build_info_fname.with_name('.buildinfo.bak') + try: + shutil.move(build_info_fname, build_info_backup) + self.build_info.dump(build_info_fname) + except OSError: + pass # ignore errors + else: + # only log on success + msg = __('build_info mismatch, copying .buildinfo to .buildinfo.bak') + logger.info(bold(__('building [html]: ')) + msg) - if self.build_info != buildinfo: - logger.debug('[build target] did not match: build_info ') yield from self.env.found_docs return - except ValueError as exc: - logger.warning(__('Failed to read build info file: %r'), exc) - except OSError: - # ignore errors on reading - pass if self.templates: template_mtime = int(self.templates.newest_template_mtime() * 10**6) @@ -943,8 +964,7 @@ def copy_extra_files(self) -> None: def write_buildinfo(self) -> None: try: - with open(path.join(self.outdir, '.buildinfo'), 'w', encoding="utf-8") as fp: - self.build_info.dump(fp) + self.build_info.dump(self.outdir / '.buildinfo') except OSError as exc: logger.warning(__('Failed to write build info file: %r'), exc) From fda9a8af271c4a7af9a71dc3f8ff521f89de0975 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sun, 11 Aug 2024 21:26:35 +0100 Subject: [PATCH 2/3] Move ``BuildInfo`` to ``_build_info`` --- sphinx/builders/_epub_base.py | 3 +- sphinx/builders/html/__init__.py | 90 +------------------------- sphinx/builders/html/_build_info.py | 98 +++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 88 deletions(-) create mode 100644 sphinx/builders/html/_build_info.py diff --git a/sphinx/builders/_epub_base.py b/sphinx/builders/_epub_base.py index 444e9812ddd..15c4bd80b52 100644 --- a/sphinx/builders/_epub_base.py +++ b/sphinx/builders/_epub_base.py @@ -15,7 +15,8 @@ from docutils.utils import smartquotes from sphinx import addnodes -from sphinx.builders.html import BuildInfo, StandaloneHTMLBuilder +from sphinx.builders.html import StandaloneHTMLBuilder +from sphinx.builders.html._build_info import BuildInfo from sphinx.locale import __ from sphinx.util import logging from sphinx.util.display import status_iterator diff --git a/sphinx/builders/html/__init__.py b/sphinx/builders/html/__init__.py index f16b24b5db6..440843dcfdf 100644 --- a/sphinx/builders/html/__init__.py +++ b/sphinx/builders/html/__init__.py @@ -3,18 +3,16 @@ from __future__ import annotations import contextlib -import hashlib import html import os import posixpath import re import shutil import sys -import types import warnings from os import path from pathlib import Path -from typing import IO, TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any from urllib.parse import quote import docutils.readers.doctree @@ -32,6 +30,7 @@ _file_checksum, _JavaScript, ) +from sphinx.builders.html._build_info import BuildInfo from sphinx.config import ENUM, Config from sphinx.deprecation import _deprecation_warning from sphinx.domains import Domain, Index, IndexEntry @@ -64,16 +63,14 @@ from sphinx.writers.html5 import HTML5Translator if TYPE_CHECKING: - from collections.abc import Iterable, Iterator, Set + from collections.abc import Iterable, Iterator from typing import TypeAlias from docutils.nodes import Node from docutils.readers import Reader from sphinx.application import Sphinx - from sphinx.config import _ConfigRebuild from sphinx.environment import BuildEnvironment - from sphinx.util.tags import Tags from sphinx.util.typing import ExtensionMetadata #: the filename for the inventory of objects @@ -94,23 +91,6 @@ ] -def _stable_hash(obj: Any) -> str: - """Return a stable hash for a Python data structure. - - We can't just use the md5 of str(obj) as the order of collections - may be random. - """ - if isinstance(obj, dict): - obj = sorted(map(_stable_hash, obj.items())) - if isinstance(obj, list | tuple | set | frozenset): - obj = sorted(map(_stable_hash, obj)) - elif isinstance(obj, type | types.FunctionType): - # The default repr() of functions includes the ID, which is not ideal. - # We use the fully qualified name instead. - obj = f'{obj.__module__}.{obj.__qualname__}' - return hashlib.md5(str(obj).encode(), usedforsecurity=False).hexdigest() - - def convert_locale_to_language_tag(locale: str | None) -> str | None: """Convert a locale string to a language tag (ex. en_US -> en-US). @@ -122,70 +102,6 @@ def convert_locale_to_language_tag(locale: str | None) -> str | None: return None -class BuildInfo: - """buildinfo file manipulator. - - HTMLBuilder and its family are storing their own envdata to ``.buildinfo``. - This class is a manipulator for the file. - """ - - @classmethod - def load(cls: type[BuildInfo], filename: Path, /) -> BuildInfo: - try: - content = filename.read_text(encoding="utf-8") - except OSError as exc: - msg = __('could not read build info file: %r') % exc - raise ValueError(msg) from exc - lines = content.splitlines() - - version = lines[0].rstrip() - if version != '# Sphinx build info version 1': - msg = __('failed to read broken build info file (unknown version)') - raise ValueError(msg) - - if not lines[2].startswith('config: '): - msg = __('failed to read broken build info file (missing config entry)') - raise ValueError(msg) - if not lines[3].startswith('tags: '): - msg = __('failed to read broken build info file (missing tags entry)') - raise ValueError(msg) - - build_info = BuildInfo() - build_info.config_hash = lines[2].removeprefix('config: ').strip() - build_info.tags_hash = lines[3].removeprefix('tags: ').strip() - return build_info - - def __init__( - self, - config: Config | None = None, - tags: Tags | None = None, - config_categories: Set[_ConfigRebuild] = frozenset(), - ) -> None: - self.config_hash = '' - self.tags_hash = '' - - if config: - values = {c.name: c.value for c in config.filter(config_categories)} - self.config_hash = _stable_hash(values) - - if tags: - self.tags_hash = _stable_hash(sorted(tags)) - - def __eq__(self, other: BuildInfo) -> bool: # type: ignore[override] - return (self.config_hash == other.config_hash and - self.tags_hash == other.tags_hash) - - def dump(self, filename: Path, /) -> None: - build_info = ( - '# Sphinx build info version 1\n' - '# This file records the configuration used when building these files. ' - 'When it is not found, a full rebuild will be done.\n' - f'config: {self.config_hash}\n' - f'tags: {self.tags_hash}\n' - ) - filename.write_text(build_info, encoding="utf-8") - - class StandaloneHTMLBuilder(Builder): """ Builds standalone HTML docs. diff --git a/sphinx/builders/html/_build_info.py b/sphinx/builders/html/_build_info.py new file mode 100644 index 00000000000..ae98211382a --- /dev/null +++ b/sphinx/builders/html/_build_info.py @@ -0,0 +1,98 @@ +"""Record metadata for the build process.""" + +from __future__ import annotations + +import hashlib +import types +from typing import TYPE_CHECKING + +from sphinx.locale import __ + +if TYPE_CHECKING: + from collections.abc import Set + from pathlib import Path + from typing import Any + + from sphinx.config import Config, _ConfigRebuild + from sphinx.util.tags import Tags + + +class BuildInfo: + """buildinfo file manipulator. + + HTMLBuilder and its family are storing their own envdata to ``.buildinfo``. + This class is a manipulator for the file. + """ + + @classmethod + def load(cls: type[BuildInfo], filename: Path, /) -> BuildInfo: + try: + content = filename.read_text(encoding="utf-8") + except OSError as exc: + msg = __('could not read build info file: %r') % exc + raise ValueError(msg) from exc + lines = content.splitlines() + + version = lines[0].rstrip() + if version != '# Sphinx build info version 1': + msg = __('failed to read broken build info file (unknown version)') + raise ValueError(msg) + + if not lines[2].startswith('config: '): + msg = __('failed to read broken build info file (missing config entry)') + raise ValueError(msg) + if not lines[3].startswith('tags: '): + msg = __('failed to read broken build info file (missing tags entry)') + raise ValueError(msg) + + build_info = BuildInfo() + build_info.config_hash = lines[2].removeprefix('config: ').strip() + build_info.tags_hash = lines[3].removeprefix('tags: ').strip() + return build_info + + def __init__( + self, + config: Config | None = None, + tags: Tags | None = None, + config_categories: Set[_ConfigRebuild] = frozenset(), + ) -> None: + self.config_hash = '' + self.tags_hash = '' + + if config: + values = {c.name: c.value for c in config.filter(config_categories)} + self.config_hash = _stable_hash(values) + + if tags: + self.tags_hash = _stable_hash(sorted(tags)) + + def __eq__(self, other: BuildInfo) -> bool: # type: ignore[override] + return (self.config_hash == other.config_hash and + self.tags_hash == other.tags_hash) + + def dump(self, filename: Path, /) -> None: + build_info = ( + '# Sphinx build info version 1\n' + '# This file records the configuration used when building these files. ' + 'When it is not found, a full rebuild will be done.\n' + f'config: {self.config_hash}\n' + f'tags: {self.tags_hash}\n' + ) + filename.write_text(build_info, encoding="utf-8") + + +def _stable_hash(obj: Any) -> str: + """Return a stable hash for a Python data structure. + + We can't just use the md5 of str(obj) as the order of collections + may be random. + """ + if isinstance(obj, dict): + obj = sorted(map(_stable_hash, obj.items())) + if isinstance(obj, list | tuple | set | frozenset): + obj = sorted(map(_stable_hash, obj)) + elif isinstance(obj, type | types.FunctionType): + # The default repr() of functions includes the ID, which is not ideal. + # We use the fully qualified name instead. + obj = f'{obj.__module__}.{obj.__qualname__}' + return hashlib.md5(str(obj).encode(), usedforsecurity=False).hexdigest() From f54da9447fa186255d6fecde618bbff5e66c7a20 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sun, 11 Aug 2024 22:03:25 +0100 Subject: [PATCH 3/3] Fix OSError --- sphinx/builders/html/__init__.py | 3 +++ sphinx/builders/html/_build_info.py | 6 +----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/sphinx/builders/html/__init__.py b/sphinx/builders/html/__init__.py index 440843dcfdf..e6453bea7c3 100644 --- a/sphinx/builders/html/__init__.py +++ b/sphinx/builders/html/__init__.py @@ -329,6 +329,9 @@ def get_outdated_docs(self) -> Iterator[str]: build_info = BuildInfo.load(build_info_fname) except ValueError as exc: logger.warning(__('Failed to read build info file: %r'), exc) + except OSError: + # ignore errors on reading + pass else: if self.build_info != build_info: # log the mismatch and backup the old build info diff --git a/sphinx/builders/html/_build_info.py b/sphinx/builders/html/_build_info.py index ae98211382a..5b364c0d9fc 100644 --- a/sphinx/builders/html/_build_info.py +++ b/sphinx/builders/html/_build_info.py @@ -26,11 +26,7 @@ class BuildInfo: @classmethod def load(cls: type[BuildInfo], filename: Path, /) -> BuildInfo: - try: - content = filename.read_text(encoding="utf-8") - except OSError as exc: - msg = __('could not read build info file: %r') % exc - raise ValueError(msg) from exc + content = filename.read_text(encoding="utf-8") lines = content.splitlines() version = lines[0].rstrip()