Skip to content

Commit

Permalink
Merge pull request #287 from emarondan/adding-pre-commit
Browse files Browse the repository at this point in the history
Add pre-commit
  • Loading branch information
Gallaecio authored Feb 23, 2024
2 parents 0475327 + 047282b commit 618ad9f
Show file tree
Hide file tree
Showing 20 changed files with 124 additions and 272 deletions.
2 changes: 2 additions & 0 deletions .bandit.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
skips:
- B101
- B311
- B320
- B410
exclude_dirs: ['tests']
1 change: 1 addition & 0 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ per-file-ignores =
setup.py:E501
tests/test_selector.py:E501
tests/test_selector_csstranslator.py:E501
tests/test_selector_jmespath.py:E501
tests/test_utils.py:E501
tests/test_xpathfuncs.py:E501
tests/typing/*.py:E,F
2 changes: 2 additions & 0 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# applying pre-commit hooks to the project
a57c23e3b7be0f001595bd8767fe05e40a66e730
8 changes: 1 addition & 7 deletions .github/workflows/checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@ jobs:
include:
- python-version: "3.12"
env:
TOXENV: security
- python-version: "3.12"
env:
TOXENV: flake8
TOXENV: pre-commit
- python-version: "3.12"
env:
TOXENV: pylint
Expand All @@ -23,9 +20,6 @@ jobs:
- python-version: "3.12"
env:
TOXENV: typing
- python-version: "3.12"
env:
TOXENV: black
- python-version: "3.12"
env:
TOXENV: twinecheck
Expand Down
2 changes: 2 additions & 0 deletions .isort.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[settings]
profile = black
18 changes: 18 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
repos:
- repo: https://github.com/PyCQA/bandit
rev: 1.7.7
hooks:
- id: bandit
args: [-r, -c, .bandit.yml]
- repo: https://github.com/PyCQA/flake8
rev: 7.0.0
hooks:
- id: flake8
- repo: https://github.com/psf/black.git
rev: 24.1.1
hooks:
- id: black
- repo: https://github.com/pycqa/isort
rev: 5.13.2
hooks:
- id: isort
7 changes: 2 additions & 5 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os
import sys


# Get the project root dir, which is the parent dir of this
cwd = os.getcwd()
project_root = os.path.dirname(cwd)
Expand All @@ -13,8 +12,7 @@
# version is used.
sys.path.insert(0, project_root)

import parsel

import parsel # noqa: E402

# -- General configuration ---------------------------------------------

Expand Down Expand Up @@ -98,10 +96,9 @@
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
("index", "parsel", "Parsel Documentation", ["Scrapy Project"], 1)
("index", "parsel", "Parsel Documentation", ["Scrapy Project"], 1),
]


# -- Options for Texinfo output ----------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
Expand Down
4 changes: 2 additions & 2 deletions parsel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
"xpathfuncs",
]

from parsel.selector import Selector, SelectorList # NOQA
from parsel.csstranslator import css2xpath # NOQA
from parsel import xpathfuncs # NOQA
from parsel.csstranslator import css2xpath # NOQA
from parsel.selector import Selector, SelectorList # NOQA

xpathfuncs.setup()
33 changes: 11 additions & 22 deletions parsel/csstranslator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@

from cssselect import GenericTranslator as OriginalGenericTranslator
from cssselect import HTMLTranslator as OriginalHTMLTranslator
from cssselect.xpath import XPathExpr as OriginalXPathExpr
from cssselect.xpath import ExpressionError
from cssselect.parser import Element, FunctionalPseudoElement, PseudoElement

from cssselect.xpath import ExpressionError
from cssselect.xpath import XPathExpr as OriginalXPathExpr

if TYPE_CHECKING:
# typing.Self requires Python 3.11
Expand All @@ -25,9 +24,7 @@ def from_xpath(
textnode: bool = False,
attribute: Optional[str] = None,
) -> "Self":
x = cls(
path=xpath.path, element=xpath.element, condition=xpath.condition
)
x = cls(path=xpath.path, element=xpath.element, condition=xpath.condition)
x.textnode = textnode
x.attribute = attribute
return x
Expand Down Expand Up @@ -82,9 +79,7 @@ class TranslatorMixin:
Currently supported pseudo-elements are ``::text`` and ``::attr(ATTR_NAME)``.
"""

def xpath_element(
self: TranslatorProtocol, selector: Element
) -> XPathExpr:
def xpath_element(self: TranslatorProtocol, selector: Element) -> XPathExpr:
# https://github.com/python/mypy/issues/12344
xpath = super().xpath_element(selector) # type: ignore[safe-super]
return XPathExpr.from_xpath(xpath)
Expand All @@ -104,7 +99,9 @@ def xpath_pseudo_element(
)
xpath = method(xpath, pseudo_element)
else:
method_name = f"xpath_{pseudo_element.replace('-', '_')}_simple_pseudo_element"
method_name = (
f"xpath_{pseudo_element.replace('-', '_')}_simple_pseudo_element"
)
method = getattr(self, method_name, None)
if not method:
raise ExpressionError(
Expand All @@ -121,30 +118,22 @@ def xpath_attr_functional_pseudo_element(
raise ExpressionError(
f"Expected a single string or ident for ::attr(), got {function.arguments!r}" # noqa: E231
)
return XPathExpr.from_xpath(
xpath, attribute=function.arguments[0].value
)
return XPathExpr.from_xpath(xpath, attribute=function.arguments[0].value)

def xpath_text_simple_pseudo_element(
self, xpath: OriginalXPathExpr
) -> XPathExpr:
def xpath_text_simple_pseudo_element(self, xpath: OriginalXPathExpr) -> XPathExpr:
"""Support selecting text nodes using ::text pseudo-element"""
return XPathExpr.from_xpath(xpath, textnode=True)


class GenericTranslator(TranslatorMixin, OriginalGenericTranslator):
@lru_cache(maxsize=256)
def css_to_xpath(
self, css: str, prefix: str = "descendant-or-self::"
) -> str:
def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
return super().css_to_xpath(css, prefix)


class HTMLTranslator(TranslatorMixin, OriginalHTMLTranslator):
@lru_cache(maxsize=256)
def css_to_xpath(
self, css: str, prefix: str = "descendant-or-self::"
) -> str:
def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
return super().css_to_xpath(css, prefix)


Expand Down
41 changes: 10 additions & 31 deletions parsel/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
from .csstranslator import GenericTranslator, HTMLTranslator
from .utils import extract_regex, flatten, iflatten, shorten


_SelectorType = TypeVar("_SelectorType", bound="Selector")
_ParserType = Union[etree.XMLParser, etree.HTMLParser]
# simplified _OutputMethodArg from types-lxml
Expand Down Expand Up @@ -135,18 +134,14 @@ def __getitem__(
) -> Union[_SelectorType, "SelectorList[_SelectorType]"]:
o = super().__getitem__(pos)
if isinstance(pos, slice):
return self.__class__(
typing.cast("SelectorList[_SelectorType]", o)
)
return self.__class__(typing.cast("SelectorList[_SelectorType]", o))
else:
return typing.cast(_SelectorType, o)

def __getstate__(self) -> None:
raise TypeError("can't pickle SelectorList objects")

def jmespath(
self, query: str, **kwargs: Any
) -> "SelectorList[_SelectorType]":
def jmespath(self, query: str, **kwargs: Any) -> "SelectorList[_SelectorType]":
"""
Call the ``.jmespath()`` method for each element in this list and return
their results flattened as another :class:`SelectorList`.
Expand All @@ -158,9 +153,7 @@ def jmespath(
selector.jmespath('author.name', options=jmespath.Options(dict_cls=collections.OrderedDict))
"""
return self.__class__(
flatten([x.jmespath(query, **kwargs) for x in self])
)
return self.__class__(flatten([x.jmespath(query, **kwargs) for x in self]))

def xpath(
self,
Expand All @@ -185,9 +178,7 @@ def xpath(
selector.xpath('//a[href=$url]', url="http://www.example.com")
"""
return self.__class__(
flatten(
[x.xpath(xpath, namespaces=namespaces, **kwargs) for x in self]
)
flatten([x.xpath(xpath, namespaces=namespaces, **kwargs) for x in self])
)

def css(self, query: str) -> "SelectorList[_SelectorType]":
Expand All @@ -211,9 +202,7 @@ def re(
Passing ``replace_entities`` as ``False`` switches off these
replacements.
"""
return flatten(
[x.re(regex, replace_entities=replace_entities) for x in self]
)
return flatten([x.re(regex, replace_entities=replace_entities) for x in self])

@typing.overload
def re_first(
Expand Down Expand Up @@ -316,9 +305,7 @@ def drop(self) -> None:
_NOT_SET = object()


def _get_root_from_text(
text: str, *, type: str, **lxml_kwargs: Any
) -> etree._Element:
def _get_root_from_text(text: str, *, type: str, **lxml_kwargs: Any) -> etree._Element:
return create_root_node(text, _ctgroup[type]["_parser"], **lxml_kwargs)


Expand Down Expand Up @@ -583,9 +570,7 @@ def make_selector(x: Any) -> _SelectorType: # closure function
return self.__class__(root=x, _expr=query)

result = [make_selector(x) for x in result]
return typing.cast(
SelectorList[_SelectorType], self.selectorlist_cls(result)
)
return typing.cast(SelectorList[_SelectorType], self.selectorlist_cls(result))

def xpath(
self: _SelectorType,
Expand All @@ -611,9 +596,7 @@ def xpath(
selector.xpath('//a[href=$url]', url="http://www.example.com")
"""
if self.type not in ("html", "xml", "text"):
raise ValueError(
f"Cannot use xpath on a Selector of type {self.type!r}"
)
raise ValueError(f"Cannot use xpath on a Selector of type {self.type!r}")
if self.type in ("html", "xml"):
try:
xpathev = self.root.xpath
Expand Down Expand Up @@ -654,9 +637,7 @@ def xpath(
)
for x in result
]
return typing.cast(
SelectorList[_SelectorType], self.selectorlist_cls(result)
)
return typing.cast(SelectorList[_SelectorType], self.selectorlist_cls(result))

def css(self: _SelectorType, query: str) -> SelectorList[_SelectorType]:
"""
Expand All @@ -670,9 +651,7 @@ def css(self: _SelectorType, query: str) -> SelectorList[_SelectorType]:
.. _cssselect: https://pypi.python.org/pypi/cssselect/
"""
if self.type not in ("html", "xml", "text"):
raise ValueError(
f"Cannot use css on a Selector of type {self.type!r}"
)
raise ValueError(f"Cannot use css on a Selector of type {self.type!r}")
return self.xpath(self._css2xpath(query))

def _css2xpath(self, query: str) -> str:
Expand Down
1 change: 1 addition & 0 deletions parsel/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
from typing import Any, Iterable, Iterator, List, Match, Pattern, Union, cast

from w3lib.html import replace_entities as w3lib_replace_entities


Expand Down
10 changes: 2 additions & 8 deletions parsel/xpathfuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@
from typing import Any, Callable, Optional

from lxml import etree

from w3lib.html import HTML5_WHITESPACE


regex = f"[{HTML5_WHITESPACE}]+"
replace_html5_whitespaces = re.compile(regex).sub

Expand Down Expand Up @@ -43,14 +41,10 @@ def has_class(context: Any, *classes: str) -> bool:
"""
if not context.eval_context.get("args_checked"):
if not classes:
raise ValueError(
"XPath error: has-class must have at least 1 argument"
)
raise ValueError("XPath error: has-class must have at least 1 argument")
for c in classes:
if not isinstance(c, str):
raise ValueError(
"XPath error: has-class arguments must be strings"
)
raise ValueError("XPath error: has-class arguments must be strings")
context.eval_context["args_checked"] = True

node_cls = context.context_node.get("class")
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from setuptools import setup


with open("README.rst", encoding="utf-8") as readme_file:
readme = readme_file.read()

Expand Down
Loading

0 comments on commit 618ad9f

Please sign in to comment.