diff --git a/docs/source/history.rst b/docs/source/history.rst index 8249907..2ad6802 100644 --- a/docs/source/history.rst +++ b/docs/source/history.rst @@ -35,6 +35,8 @@ Bug Fixes - `#229 `__ Gracefully handle if git is not installed +- `#227 `__ Use pypi.org's + JSON API instead of XML-RPC. 7.7.0 ===== diff --git a/pyproject.toml b/pyproject.toml index d141dea..2934b1d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ classifiers = [ requires-python = ">=3.10" -dependencies = ["PyEnchant>=3.1.1", "Sphinx>=3.0.0"] +dependencies = ["PyEnchant>=3.1.1", "Sphinx>=3.0.0", "requests>=2.32.3"] [project.optional-dependencies] test = ["pytest", "pytest-cov", "coverage!=4.4,>=4.0"] diff --git a/sphinxcontrib/spelling/filters.py b/sphinxcontrib/spelling/filters.py index 60d9024..44d6945 100644 --- a/sphinxcontrib/spelling/filters.py +++ b/sphinxcontrib/spelling/filters.py @@ -1,8 +1,7 @@ # # Copyright (c) 2010 Doug Hellmann. All rights reserved. # -"""Spelling checker extension for Sphinx. -""" +"""Spelling checker extension for Sphinx.""" # TODO - Words with multiple uppercase letters treated as classes and ignored @@ -10,8 +9,8 @@ import importlib import subprocess import sys -from xmlrpc import client as xmlrpc_client +import requests from enchant.tokenize import Filter, get_tokenizer, tokenize, unit_tokenize from sphinx.util import logging @@ -22,18 +21,19 @@ class AcronymFilter(Filter): """If a word looks like an acronym (all upper case letters), ignore it. """ + def _skip(self, word): return ( - word.isupper() or # all caps + word.isupper() # all caps + or # pluralized acronym ("URLs") - (word[-1].lower() == 's' and word[:-1].isupper()) + (word[-1].lower() == "s" and word[:-1].isupper()) ) class list_tokenize(tokenize): - def __init__(self, words): - super().__init__('') + super().__init__("") self._words = words def next(self): @@ -44,8 +44,8 @@ def next(self): class ContractionFilter(Filter): - """Strip common contractions from words. - """ + """Strip common contractions from words.""" + splits = { "aren't": ["are", "not"], "can't": ["can", "not"], @@ -138,8 +138,7 @@ def _split(self, word): class IgnoreWordsFilter(Filter): - """Given a set of words, ignore them all. - """ + """Given a set of words, ignore them all.""" def __init__(self, tokenizer, word_set): self.word_set = set(word_set) @@ -150,7 +149,6 @@ def _skip(self, word): class IgnoreWordsFilterFactory: - def __init__(self, words): self.words = words @@ -159,23 +157,31 @@ def __call__(self, tokenizer): class PyPIFilterFactory(IgnoreWordsFilterFactory): - """Build an IgnoreWordsFilter for all of the names of packages on PyPI. - """ + """Build an IgnoreWordsFilter for all of the names of packages on PyPI.""" + def __init__(self): - client = xmlrpc_client.ServerProxy('https://pypi.python.org/pypi') - super().__init__(client.list_packages()) + r = requests.get( + "https://pypi.org/simple/", + headers={ + "user-agent": "sphinxcontrib.spelling", + "accept": "application/vnd.pypi.simple.v1+json", + }, + ) + names = [i["name"] for i in r.json()["projects"]] + logger.debug("retrieved %d project names from pypi.org", len(names)) + super().__init__(names) class PythonBuiltinsFilter(Filter): - """Ignore names of built-in Python symbols. - """ + """Ignore names of built-in Python symbols.""" + def _skip(self, word): return hasattr(builtins, word) class ImportableModuleFilter(Filter): - """Ignore names of modules that we could import. - """ + """Ignore names of modules that we could import.""" + def __init__(self, tokenizer): super().__init__(tokenizer) self.found_modules = set(sys.builtin_module_names) @@ -185,7 +191,7 @@ def __init__(self, tokenizer): # valid module, which is consistent with the behavior before # version 7.3.1. See # https://github.com/sphinx-contrib/spelling/issues/141 - self.sought_modules.add('__main__') + self.sought_modules.add("__main__") def _skip(self, word): # If the word looks like a python module filename, strip the @@ -195,13 +201,13 @@ def _skip(self, word): # it look like Sphinx is complaining about a commandline # argument. See # https://github.com/sphinx-contrib/spelling/issues/142 - if word.endswith('.py'): + if word.endswith(".py"): logger.debug( - 'removing .py extension from %r before searching for module', - word) + "removing .py extension from %r before searching for module", word + ) word = word[:-3] - valid_module_name = all(n.isidentifier() for n in word.split('.')) + valid_module_name = all(n.isidentifier() for n in word.split(".")) if not valid_module_name: return False @@ -214,8 +220,7 @@ def _skip(self, word): # error out of distutils, or something else triggered # by failing to be able to import a parent package to # use the metadata to search for a subpackage. - logger.debug('find_spec(%r) failed, invalid module name: %s', - word, err) + logger.debug("find_spec(%r) failed, invalid module name: %s", word, err) else: if mod is not None: self.found_modules.add(word) @@ -230,25 +235,28 @@ class ContributorFilter(IgnoreWordsFilter): tokens that are in the set. """ - _pretty_format = ( - '%(trailers:key=Co-Authored-By,separator=%x0A)%x0A%an%x0A%cn' - ) + _pretty_format = "%(trailers:key=Co-Authored-By,separator=%x0A)%x0A%an%x0A%cn" def __init__(self, tokenizer): contributors = self._get_contributors() super().__init__(tokenizer, contributors) def _get_contributors(self): - logger.info('Scanning contributors') - cmd = ['git', 'log', '--quiet', '--no-color', - f'--pretty=format:{self._pretty_format}'] + logger.info("Scanning contributors") + cmd = [ + "git", + "log", + "--quiet", + "--no-color", + f"--pretty=format:{self._pretty_format}", + ] try: p = subprocess.run(cmd, check=True, stdout=subprocess.PIPE) except (subprocess.CalledProcessError, FileNotFoundError) as err: - logger.warning('Called: %s', ' '.join(cmd)) - logger.warning('Failed to scan contributors: %s', err) + logger.warning("Called: %s", " ".join(cmd)) + logger.warning("Failed to scan contributors: %s", err) return set() - output = p.stdout.decode('utf-8') - tokenizer = get_tokenizer('en_US', filters=[]) + output = p.stdout.decode("utf-8") + tokenizer = get_tokenizer("en_US", filters=[]) return {word for word, pos in tokenizer(output)} diff --git a/tests/test_filter.py b/tests/test_filter.py index 3601f85..5b4bdf2 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -1,8 +1,7 @@ # # Copyright (c) 2010 Doug Hellmann. All rights reserved. # -"""Tests for filters. -""" +"""Tests for filters.""" import contextlib import logging @@ -12,38 +11,38 @@ import pytest from enchant.tokenize import get_tokenizer -from sphinxcontrib.spelling import filters # isort:skip -from tests import helpers # isort:skip +from sphinxcontrib.spelling import filters # isort:skip +from tests import helpers # isort:skip # Replace the sphinx logger with a normal one so pytest can collect # the output. -filters.logger = logging.getLogger('test.filters') +filters.logger = logging.getLogger("test.filters") def test_builtin_unicode(): f = filters.PythonBuiltinsFilter(None) - assert not f._skip('passé') + assert not f._skip("passé") def test_builtin_regular(): f = filters.PythonBuiltinsFilter(None) - assert f._skip('print') + assert f._skip("print") def test_acronym(): - text = 'a front-end for DBM-style databases' - t = get_tokenizer('en_US', []) + text = "a front-end for DBM-style databases" + t = get_tokenizer("en_US", []) f = filters.AcronymFilter(t) words = [w[0] for w in f(text)] - assert 'DBM' not in words, 'Failed to filter out acronym' + assert "DBM" not in words, "Failed to filter out acronym" def test_acronym_unicode(): - text = 'a front-end for DBM-style databases' - t = get_tokenizer('en_US', []) + text = "a front-end for DBM-style databases" + t = get_tokenizer("en_US", []) f = filters.AcronymFilter(t) words = [w[0] for w in f(text)] - assert 'DBM' not in words, 'Failed to filter out acronym' + assert "DBM" not in words, "Failed to filter out acronym" @helpers.require_git_repo @@ -77,7 +76,7 @@ def test_acronym_unicode(): "Timotheus", "Tobias", "Tricoli", - ] + ], ) def test_contributors(name): f = filters.ContributorFilter(None) @@ -87,11 +86,11 @@ def test_contributors(name): @pytest.mark.parametrize( "word,expected", [ - ('os', True), - ('os.name', False), - ('__main__', False), + ("os", True), + ("os.name", False), + ("__main__", False), ("don't", False), - ] + ], ) def test_importable_module_skip(word, expected): f = filters.ImportableModuleFilter(None) @@ -110,42 +109,45 @@ def import_path(new_path): def test_importable_module_with_side_effets(tmpdir): - logging.debug('tmpdir %r', tmpdir) - logging.debug('cwd %r', os.getcwd()) + logging.debug("tmpdir %r", tmpdir) + logging.debug("cwd %r", os.getcwd()) - parentdir = tmpdir.join('parent') + parentdir = tmpdir.join("parent") parentdir.mkdir() - parentdir.join('__init__.py').write( - 'raise SystemExit("exit as side-effect")\n' - ) - parentdir.join('child.py').write('') + parentdir.join("__init__.py").write('raise SystemExit("exit as side-effect")\n') + parentdir.join("child.py").write("") with import_path([str(tmpdir)] + sys.path): f = filters.ImportableModuleFilter(None) - skip_parent = f._skip('parent') - skip_both = f._skip('parent.child') + skip_parent = f._skip("parent") + skip_both = f._skip("parent.child") # The parent module name is valid because it is not imported, only # discovered. assert skip_parent is True - assert 'parent' in f.found_modules + assert "parent" in f.found_modules # The child module name is not valid because the parent is # imported to find the child and that triggers the side-effect. assert skip_both is False - assert 'parent.child' not in f.found_modules + assert "parent.child" not in f.found_modules def test_importable_module_with_system_exit(tmpdir): - path = tmpdir.join('mytestmodule.py') + path = tmpdir.join("mytestmodule.py") path.write('raise SystemExit("exit as side-effect")\n') with import_path([str(tmpdir)] + sys.path): f = filters.ImportableModuleFilter(None) - skip = f._skip('mytestmodule') + skip = f._skip("mytestmodule") # The filter does not actually import the module in this case, so # it shows up as a valid word. assert skip is True - assert 'mytestmodule' in f.found_modules + assert "mytestmodule" in f.found_modules + + +def test_pypi_filter_factory(): + f = filters.PyPIFilterFactory() + assert "sphinxcontrib-spelling" in f.words