Merge pull request #287 from emarondan/adding-pre-commit

Add pre-commit
scrapy · Feb 23, 2024 · 618ad9f · 618ad9f
2 parents 0475327 + 047282b
commit 618ad9f
Show file tree

Hide file tree

Showing 20 changed files with 124 additions and 272 deletions.
diff --git a/.bandit.yml b/.bandit.yml
@@ -1,4 +1,6 @@
 skips:
 - B101
+- B311
 - B320
 - B410
+exclude_dirs: ['tests']
diff --git a/.flake8 b/.flake8
@@ -9,6 +9,7 @@ per-file-ignores =
     setup.py:E501
     tests/test_selector.py:E501
     tests/test_selector_csstranslator.py:E501
+    tests/test_selector_jmespath.py:E501
     tests/test_utils.py:E501
     tests/test_xpathfuncs.py:E501
     tests/typing/*.py:E,F
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
@@ -0,0 +1,2 @@
+# applying pre-commit hooks to the project
+a57c23e3b7be0f001595bd8767fe05e40a66e730
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -10,10 +10,7 @@ jobs:
         include:
         - python-version: "3.12"
           env:
-            TOXENV: security
-        - python-version: "3.12"
-          env:
-            TOXENV: flake8
+            TOXENV: pre-commit
         - python-version: "3.12"
           env:
             TOXENV: pylint
@@ -23,9 +20,6 @@ jobs:
         - python-version: "3.12"
           env:
             TOXENV: typing
-        - python-version: "3.12"
-          env:
-            TOXENV: black
         - python-version: "3.12"
           env:
             TOXENV: twinecheck

diff --git a/.isort.cfg b/.isort.cfg
@@ -0,0 +1,2 @@
+[settings]
+profile = black
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,18 @@
+repos:
+- repo: https://github.com/PyCQA/bandit
+  rev: 1.7.7
+  hooks:
+  - id: bandit
+    args: [-r, -c, .bandit.yml]
+- repo: https://github.com/PyCQA/flake8
+  rev: 7.0.0
+  hooks:
+  - id: flake8
+- repo: https://github.com/psf/black.git
+  rev: 24.1.1
+  hooks:
+  - id: black
+- repo: https://github.com/pycqa/isort
+  rev: 5.13.2
+  hooks:
+  - id: isort
diff --git a/docs/conf.py b/docs/conf.py
@@ -3,7 +3,6 @@
 import os
 import sys
 
-
 # Get the project root dir, which is the parent dir of this
 cwd = os.getcwd()
 project_root = os.path.dirname(cwd)
@@ -13,8 +12,7 @@
 # version is used.
 sys.path.insert(0, project_root)
 
-import parsel
-
+import parsel  # noqa: E402
 
 # -- General configuration ---------------------------------------------
 
@@ -98,10 +96,9 @@
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
-    ("index", "parsel", "Parsel Documentation", ["Scrapy Project"], 1)
+    ("index", "parsel", "Parsel Documentation", ["Scrapy Project"], 1),
 ]
 
-
 # -- Options for Texinfo output ----------------------------------------
 
 # Grouping the document tree into Texinfo files. List of tuples

diff --git a/parsel/__init__.py b/parsel/__init__.py
@@ -13,8 +13,8 @@
     "xpathfuncs",
 ]
 
-from parsel.selector import Selector, SelectorList  # NOQA
-from parsel.csstranslator import css2xpath  # NOQA
 from parsel import xpathfuncs  # NOQA
+from parsel.csstranslator import css2xpath  # NOQA
+from parsel.selector import Selector, SelectorList  # NOQA
 
 xpathfuncs.setup()
diff --git a/parsel/csstranslator.py b/parsel/csstranslator.py
@@ -3,10 +3,9 @@
 
 from cssselect import GenericTranslator as OriginalGenericTranslator
 from cssselect import HTMLTranslator as OriginalHTMLTranslator
-from cssselect.xpath import XPathExpr as OriginalXPathExpr
-from cssselect.xpath import ExpressionError
 from cssselect.parser import Element, FunctionalPseudoElement, PseudoElement
-
+from cssselect.xpath import ExpressionError
+from cssselect.xpath import XPathExpr as OriginalXPathExpr
 
 if TYPE_CHECKING:
     # typing.Self requires Python 3.11
@@ -25,9 +24,7 @@ def from_xpath(
         textnode: bool = False,
         attribute: Optional[str] = None,
     ) -> "Self":
-        x = cls(
-            path=xpath.path, element=xpath.element, condition=xpath.condition
-        )
+        x = cls(path=xpath.path, element=xpath.element, condition=xpath.condition)
         x.textnode = textnode
         x.attribute = attribute
         return x
@@ -82,9 +79,7 @@ class TranslatorMixin:
     Currently supported pseudo-elements are ``::text`` and ``::attr(ATTR_NAME)``.
     """
 
-    def xpath_element(
-        self: TranslatorProtocol, selector: Element
-    ) -> XPathExpr:
+    def xpath_element(self: TranslatorProtocol, selector: Element) -> XPathExpr:
         # https://github.com/python/mypy/issues/12344
         xpath = super().xpath_element(selector)  # type: ignore[safe-super]
         return XPathExpr.from_xpath(xpath)
@@ -104,7 +99,9 @@ def xpath_pseudo_element(
                 )
             xpath = method(xpath, pseudo_element)
         else:
-            method_name = f"xpath_{pseudo_element.replace('-', '_')}_simple_pseudo_element"
+            method_name = (
+                f"xpath_{pseudo_element.replace('-', '_')}_simple_pseudo_element"
+            )
             method = getattr(self, method_name, None)
             if not method:
                 raise ExpressionError(
@@ -121,30 +118,22 @@ def xpath_attr_functional_pseudo_element(
             raise ExpressionError(
                 f"Expected a single string or ident for ::attr(), got {function.arguments!r}"  # noqa: E231
             )
-        return XPathExpr.from_xpath(
-            xpath, attribute=function.arguments[0].value
-        )
+        return XPathExpr.from_xpath(xpath, attribute=function.arguments[0].value)
 
-    def xpath_text_simple_pseudo_element(
-        self, xpath: OriginalXPathExpr
-    ) -> XPathExpr:
+    def xpath_text_simple_pseudo_element(self, xpath: OriginalXPathExpr) -> XPathExpr:
         """Support selecting text nodes using ::text pseudo-element"""
         return XPathExpr.from_xpath(xpath, textnode=True)
 
 
 class GenericTranslator(TranslatorMixin, OriginalGenericTranslator):
     @lru_cache(maxsize=256)
-    def css_to_xpath(
-        self, css: str, prefix: str = "descendant-or-self::"
-    ) -> str:
+    def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
         return super().css_to_xpath(css, prefix)
 
 
 class HTMLTranslator(TranslatorMixin, OriginalHTMLTranslator):
     @lru_cache(maxsize=256)
-    def css_to_xpath(
-        self, css: str, prefix: str = "descendant-or-self::"
-    ) -> str:
+    def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
         return super().css_to_xpath(css, prefix)
 
 

diff --git a/parsel/selector.py b/parsel/selector.py
@@ -29,7 +29,6 @@
 from .csstranslator import GenericTranslator, HTMLTranslator
 from .utils import extract_regex, flatten, iflatten, shorten
 
-
 _SelectorType = TypeVar("_SelectorType", bound="Selector")
 _ParserType = Union[etree.XMLParser, etree.HTMLParser]
 # simplified _OutputMethodArg from types-lxml
@@ -135,18 +134,14 @@ def __getitem__(
     ) -> Union[_SelectorType, "SelectorList[_SelectorType]"]:
         o = super().__getitem__(pos)
         if isinstance(pos, slice):
-            return self.__class__(
-                typing.cast("SelectorList[_SelectorType]", o)
-            )
+            return self.__class__(typing.cast("SelectorList[_SelectorType]", o))
         else:
             return typing.cast(_SelectorType, o)
 
     def __getstate__(self) -> None:
         raise TypeError("can't pickle SelectorList objects")
 
-    def jmespath(
-        self, query: str, **kwargs: Any
-    ) -> "SelectorList[_SelectorType]":
+    def jmespath(self, query: str, **kwargs: Any) -> "SelectorList[_SelectorType]":
         """
         Call the ``.jmespath()`` method for each element in this list and return
         their results flattened as another :class:`SelectorList`.
@@ -158,9 +153,7 @@ def jmespath(
 
             selector.jmespath('author.name', options=jmespath.Options(dict_cls=collections.OrderedDict))
         """
-        return self.__class__(
-            flatten([x.jmespath(query, **kwargs) for x in self])
-        )
+        return self.__class__(flatten([x.jmespath(query, **kwargs) for x in self]))
 
     def xpath(
         self,
@@ -185,9 +178,7 @@ def xpath(
             selector.xpath('//a[href=$url]', url="http://www.example.com")
         """
         return self.__class__(
-            flatten(
-                [x.xpath(xpath, namespaces=namespaces, **kwargs) for x in self]
-            )
+            flatten([x.xpath(xpath, namespaces=namespaces, **kwargs) for x in self])
         )
 
     def css(self, query: str) -> "SelectorList[_SelectorType]":
@@ -211,9 +202,7 @@ def re(
         Passing ``replace_entities`` as ``False`` switches off these
         replacements.
         """
-        return flatten(
-            [x.re(regex, replace_entities=replace_entities) for x in self]
-        )
+        return flatten([x.re(regex, replace_entities=replace_entities) for x in self])
 
     @typing.overload
     def re_first(
@@ -316,9 +305,7 @@ def drop(self) -> None:
 _NOT_SET = object()
 
 
-def _get_root_from_text(
-    text: str, *, type: str, **lxml_kwargs: Any
-) -> etree._Element:
+def _get_root_from_text(text: str, *, type: str, **lxml_kwargs: Any) -> etree._Element:
     return create_root_node(text, _ctgroup[type]["_parser"], **lxml_kwargs)
 
 
@@ -583,9 +570,7 @@ def make_selector(x: Any) -> _SelectorType:  # closure function
                 return self.__class__(root=x, _expr=query)
 
         result = [make_selector(x) for x in result]
-        return typing.cast(
-            SelectorList[_SelectorType], self.selectorlist_cls(result)
-        )
+        return typing.cast(SelectorList[_SelectorType], self.selectorlist_cls(result))
 
     def xpath(
         self: _SelectorType,
@@ -611,9 +596,7 @@ def xpath(
             selector.xpath('//a[href=$url]', url="http://www.example.com")
         """
         if self.type not in ("html", "xml", "text"):
-            raise ValueError(
-                f"Cannot use xpath on a Selector of type {self.type!r}"
-            )
+            raise ValueError(f"Cannot use xpath on a Selector of type {self.type!r}")
         if self.type in ("html", "xml"):
             try:
                 xpathev = self.root.xpath
@@ -654,9 +637,7 @@ def xpath(
             )
             for x in result
         ]
-        return typing.cast(
-            SelectorList[_SelectorType], self.selectorlist_cls(result)
-        )
+        return typing.cast(SelectorList[_SelectorType], self.selectorlist_cls(result))
 
     def css(self: _SelectorType, query: str) -> SelectorList[_SelectorType]:
         """
@@ -670,9 +651,7 @@ def css(self: _SelectorType, query: str) -> SelectorList[_SelectorType]:
         .. _cssselect: https://pypi.python.org/pypi/cssselect/
         """
         if self.type not in ("html", "xml", "text"):
-            raise ValueError(
-                f"Cannot use css on a Selector of type {self.type!r}"
-            )
+            raise ValueError(f"Cannot use css on a Selector of type {self.type!r}")
         return self.xpath(self._css2xpath(query))
 
     def _css2xpath(self, query: str) -> str:

diff --git a/parsel/utils.py b/parsel/utils.py
@@ -1,5 +1,6 @@
 import re
 from typing import Any, Iterable, Iterator, List, Match, Pattern, Union, cast
+
 from w3lib.html import replace_entities as w3lib_replace_entities
 
 

diff --git a/parsel/xpathfuncs.py b/parsel/xpathfuncs.py
@@ -2,10 +2,8 @@
 from typing import Any, Callable, Optional
 
 from lxml import etree
-
 from w3lib.html import HTML5_WHITESPACE
 
-
 regex = f"[{HTML5_WHITESPACE}]+"
 replace_html5_whitespaces = re.compile(regex).sub
 
@@ -43,14 +41,10 @@ def has_class(context: Any, *classes: str) -> bool:
     """
     if not context.eval_context.get("args_checked"):
         if not classes:
-            raise ValueError(
-                "XPath error: has-class must have at least 1 argument"
-            )
+            raise ValueError("XPath error: has-class must have at least 1 argument")
         for c in classes:
             if not isinstance(c, str):
-                raise ValueError(
-                    "XPath error: has-class arguments must be strings"
-                )
+                raise ValueError("XPath error: has-class arguments must be strings")
         context.eval_context["args_checked"] = True
 
     node_cls = context.context_node.get("class")

diff --git a/setup.py b/setup.py
@@ -2,7 +2,6 @@
 
 from setuptools import setup
 
-
 with open("README.rst", encoding="utf-8") as readme_file:
     readme = readme_file.read()