Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Use parseable buildinfo #12741

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions sphinx/builders/html/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,15 +336,20 @@ def get_outdated_docs(self) -> Iterator[str]:
if self.build_info != build_info:
# log the mismatch and backup the old build info
build_info_backup = build_info_fname.with_name('.buildinfo.bak')
bad_keys = self.build_info.differing_keys(build_info)
msg = __('all docs marked outdated due to build_info mismatch')
if bad_keys:
msg += __(' in %s config key(s): ') % len(bad_keys)
bad_keys = bad_keys[:10] + ([] if len(bad_keys) <= 10 else ['...'])
msg += repr(bad_keys)
try:
shutil.move(build_info_fname, build_info_backup)
self.build_info.dump(build_info_fname)
except OSError:
pass # ignore errors
else:
# only log on success
msg = __('build_info mismatch, copying .buildinfo to .buildinfo.bak')
logger.info(bold(__('building [html]: ')) + msg)
msg += __(", copying .buildinfo to .buildinfo.bak")
logger.info(bold(__('building [html]: ')) + msg)

yield from self.env.found_docs
return
Expand All @@ -370,7 +375,7 @@ def get_outdated_docs(self) -> Iterator[str]:
template_mtime = 0
for docname in self.env.found_docs:
if docname not in self.env.all_docs:
logger.debug('[build target] did not in env: %r', docname)
logger.debug('[build target] did not find in env: %r', docname)
yield docname
continue
targetname = self.get_outfilename(docname)
Expand All @@ -379,15 +384,16 @@ def get_outdated_docs(self) -> Iterator[str]:
except Exception:
targetmtime = 0
try:
srcmtime = max(_last_modified_time(self.env.doc2path(docname)), template_mtime)
docpath_mtime = _last_modified_time(self.env.doc2path(docname))
srcmtime = max(docpath_mtime, template_mtime)
if srcmtime > targetmtime:
logger.debug(
'[build target] targetname %r(%s), template(%s), docname %r(%s)',
'[build target] targetname %r(%s) < max(template(%s), docname %r(%s))',
targetname,
_format_rfc3339_microseconds(targetmtime),
_format_rfc3339_microseconds(template_mtime),
docname,
_format_rfc3339_microseconds(_last_modified_time(self.env.doc2path(docname))),
_format_rfc3339_microseconds(docpath_mtime),
)
yield docname
except OSError:
Expand Down
53 changes: 38 additions & 15 deletions sphinx/builders/html/_build_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import annotations

import hashlib
import json
import types
from typing import TYPE_CHECKING

Expand Down Expand Up @@ -30,7 +30,9 @@ def load(cls: type[BuildInfo], filename: Path, /) -> BuildInfo:
lines = content.splitlines()

version = lines[0].rstrip()
if version != '# Sphinx build info version 1':
if version == '# Sphinx build info version 1':
return BuildInfo() # ignore outdated build info file
if version != '# Sphinx build info version 2':
msg = __('failed to read broken build info file (unknown version)')
raise ValueError(msg)

Expand All @@ -57,38 +59,59 @@ def __init__(

if config:
values = {c.name: c.value for c in config.filter(config_categories)}
self.config_hash = _stable_hash(values)
self.config_hash = _stable_str(values)

if tags:
self.tags_hash = _stable_hash(sorted(tags))
self.tags_hash = _stable_str(sorted(tags))

def __eq__(self, other: BuildInfo) -> bool: # type: ignore[override]
return (self.config_hash == other.config_hash and
self.tags_hash == other.tags_hash)
return (self.config_hash == other.config_hash
and self.tags_hash == other.tags_hash)

def dump(self, filename: Path, /) -> None:
build_info = (
'# Sphinx build info version 1\n'
'# Sphinx build info version 2\n'
'# This file records the configuration used when building these files. '
'When it is not found, a full rebuild will be done.\n'
f'config: {self.config_hash}\n'
f'tags: {self.tags_hash}\n'
)
filename.write_text(build_info, encoding="utf-8")

def differing_keys(self, other: BuildInfo) -> list[str]:
"""Compute the keys that differ between two configs."""
self_config = json.loads(self.config_hash)
other_config = json.loads(other.config_hash)
return [
key
for key in sorted(set(self_config) | set(other_config))
if key not in self_config
or key not in other_config
or self_config[key] != other_config[key]
]

def _stable_hash(obj: Any) -> str:
"""Return a stable hash for a Python data structure.

We can't just use the md5 of str(obj) as the order of collections
may be random.
def _stable_str(obj: Any) -> str:
"""Return a stable string representation of a Python data structure.

We can't just use str(obj) as the order of collections may be random.
"""
return json.dumps(_json_prep(obj), separators=(',', ':'))


def _json_prep(obj: Any) -> dict[str, Any] | list[Any] | str:
if isinstance(obj, dict):
obj = sorted(map(_stable_hash, obj.items()))
if isinstance(obj, list | tuple | set | frozenset):
obj = sorted(map(_stable_hash, obj))
# convert to a sorted dict
obj = {_json_prep(k): _json_prep(v) for k, v in obj.items()}
obj = {k: obj[k] for k in sorted(obj, key=str)}
elif isinstance(obj, list | tuple | set | frozenset):
# convert to a sorted list
obj = sorted(map(_json_prep, obj), key=str)
elif isinstance(obj, type | types.FunctionType):
# The default repr() of functions includes the ID, which is not ideal.
# We use the fully qualified name instead.
obj = f'{obj.__module__}.{obj.__qualname__}'
return hashlib.md5(str(obj).encode(), usedforsecurity=False).hexdigest()
else:
# we can't do any better, just use the string representation
obj = str(obj)
return obj