jg-rp · jg-rp · May 14, 2024 · May 14, 2024 · May 14, 2024 · May 14, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,7 @@
 **Fixes**
 
 - Handle end of query when lexing inside a filter expression.
+- Check patterns passed to `search` and `match` are valid I-Regexp patterns. Both of these functions now return _LogicalFalse_ if the pattern is not valid according to RFC 9485.
 
 ## Version 0.1.1
 

diff --git a/jsonpath_rfc9535/__about__.py b/jsonpath_rfc9535/__about__.py
@@ -1 +1 @@
-__version__ = "0.1.1"
+__version__ = "0.1.2"
diff --git a/jsonpath_rfc9535/environment.py b/jsonpath_rfc9535/environment.py
@@ -217,7 +217,7 @@ def check_well_typedness(
                     )
             elif typ == ExpressionType.LOGICAL:
                 if not isinstance(
-                    arg, (FilterQuery, (LogicalExpression, ComparisonExpression))
+                    arg, (FilterQuery, LogicalExpression, ComparisonExpression)
                 ):
                     raise JSONPathTypeError(
                         f"{token.value}() argument {idx} must be of LogicalType",

diff --git a/jsonpath_rfc9535/filter_expressions.py b/jsonpath_rfc9535/filter_expressions.py
@@ -186,6 +186,7 @@ def __eq__(self, other: object) -> bool:
 
     def evaluate(self, context: FilterContext) -> bool:
         """Evaluate the filter expression in the given _context_."""
+        # TODO: sort circuit eval of right if left is false
         return _compare(
             self.left.evaluate(context), self.operator, self.right.evaluate(context)
         )

diff --git a/jsonpath_rfc9535/function_extensions/_pattern.py b/jsonpath_rfc9535/function_extensions/_pattern.py
@@ -0,0 +1,31 @@
+from typing import List
+
+
+def map_re(pattern: str) -> str:
+    escaped = False
+    char_class = False
+    parts: List[str] = []
+    for ch in pattern:
+        if escaped:
+            parts.append(ch)
+            escaped = False
+            continue
+
+        if ch == ".":
+            if not char_class:
+                parts.append(r"(?:(?![\r\n])\P{Cs}|\p{Cs}\p{Cs})")
+            else:
+                parts.append(ch)
+        elif ch == "\\":
+            escaped = True
+            parts.append(ch)
+        elif ch == "[":
+            char_class = True
+            parts.append(ch)
+        elif ch == "]":
+            char_class = False
+            parts.append(ch)
+        else:
+            parts.append(ch)
+
+    return "".join(parts)
diff --git a/jsonpath_rfc9535/function_extensions/match.py b/jsonpath_rfc9535/function_extensions/match.py
@@ -1,21 +1,27 @@
 """The standard `match` function extension."""
 
 import regex as re
+from iregexp_check import check
 
 from jsonpath_rfc9535.function_extensions import ExpressionType
 from jsonpath_rfc9535.function_extensions import FilterFunction
 
+from ._pattern import map_re
+
 
 class Match(FilterFunction):
     """The standard `match` function."""
 
     arg_types = [ExpressionType.VALUE, ExpressionType.VALUE]
     return_type = ExpressionType.LOGICAL
 
-    def __call__(self, string: str, pattern: str) -> bool:
+    def __call__(self, string: str, pattern: object) -> bool:
         """Return `True` if _string_ matches _pattern_, or `False` otherwise."""
+        if not isinstance(pattern, str) or not check(pattern):
+            return False
+
         try:
             # re.fullmatch caches compiled patterns internally
-            return bool(re.fullmatch(pattern, string))
+            return bool(re.fullmatch(map_re(pattern), string))
         except (TypeError, re.error):
             return False
diff --git a/jsonpath_rfc9535/function_extensions/search.py b/jsonpath_rfc9535/function_extensions/search.py
@@ -1,21 +1,27 @@
 """The standard `search` function extension."""
 
 import regex as re
+from iregexp_check import check
 
 from jsonpath_rfc9535.function_extensions import ExpressionType
 from jsonpath_rfc9535.function_extensions import FilterFunction
 
+from ._pattern import map_re
+
 
 class Search(FilterFunction):
     """The standard `search` function."""
 
     arg_types = [ExpressionType.VALUE, ExpressionType.VALUE]
     return_type = ExpressionType.LOGICAL
 
-    def __call__(self, string: str, pattern: str) -> bool:
+    def __call__(self, string: str, pattern: object) -> bool:
         """Return `True` if _string_ contains _pattern_, or `False` otherwise."""
+        if not isinstance(pattern, str) or not check(pattern):
+            return False
+
         try:
             # re.search caches compiled patterns internally
-            return bool(re.search(pattern, string))
+            return bool(re.search(map_re(pattern), string, re.VERSION1))
         except (TypeError, re.error):
             return False
diff --git a/jsonpath_rfc9535/parse.py b/jsonpath_rfc9535/parse.py
@@ -114,6 +114,9 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
             TokenType.TRUE: self.parse_boolean,
         }
 
+        # TODO: can a function argument be a grouped expression?
+        # TODO: can a function argument contain a !?
+
         self.function_argument_map: Dict[
             TokenType, Callable[[TokenStream], Expression]
         ] = {
@@ -412,6 +415,7 @@ def parse_grouped_expression(self, stream: TokenStream) -> Expression:
                 raise JSONPathSyntaxError(
                     "unbalanced parentheses", token=stream.current
                 )
+            # TODO: only if binary op
             expr = self.parse_infix_expression(stream, expr)
 
         stream.expect(TokenType.RPAREN)

diff --git a/jsonpath_rfc9535/query.py b/jsonpath_rfc9535/query.py
@@ -32,7 +32,7 @@ class JSONPathQuery:
         segments: The `JSONPathSegment` instances that make up this query.
     """
 
-    __slots__ = ("env", "fake_root", "segments")
+    __slots__ = ("env", "segments")
 
     def __init__(
         self,

diff --git a/pyproject.toml b/pyproject.toml
@@ -24,7 +24,7 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: CPython",
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["regex"]
+dependencies = ["regex", "iregexp-check>=0.1.3"]
 
 [project.urls]
 Documentation = "https://jg-rp.github.io/python-jsonpath-rfc9535/"

diff --git a/tests/cts b/tests/cts
diff --git a/tests/test_errors.py b/tests/test_errors.py
@@ -97,12 +97,12 @@ class FilterLiteralTestCase(NamedTuple):
     FilterLiteralTestCase("just int", "$[?2]"),
     FilterLiteralTestCase("just float", "$[?2.2]"),
     FilterLiteralTestCase("just null", "$[?null]"),
-    FilterLiteralTestCase("literal and literal", "$[?true and false]"),
-    FilterLiteralTestCase("literal or literal", "$[?true or false]"),
-    FilterLiteralTestCase("comparison and literal", "$[?true == false and false]"),
-    FilterLiteralTestCase("comparison or literal", "$[?true == false or false]"),
-    FilterLiteralTestCase("literal and comparison", "$[?true and true == false]"),
-    FilterLiteralTestCase("literal or comparison", "$[?false or true == false]"),
+    FilterLiteralTestCase("literal and literal", "$[?true && false]"),
+    FilterLiteralTestCase("literal or literal", "$[?true || false]"),
+    FilterLiteralTestCase("comparison and literal", "$[?true == false && false]"),
+    FilterLiteralTestCase("comparison or literal", "$[?true == false || false]"),
+    FilterLiteralTestCase("literal and comparison", "$[?true && true == false]"),
+    FilterLiteralTestCase("literal or comparison", "$[?false || true == false]"),
 ]
 
 

diff --git a/tests/test_iregexp.py b/tests/test_iregexp.py
@@ -0,0 +1,139 @@
+"""I-Regexp checking tests.
+
+Some of these test cases are derived from https:github.com/f3ath/iregexp.
+Thanks go to @f3ath and the project's license is included here.
+
+MIT License
+
+Copyright (c) 2023 Alexey
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+
+import dataclasses
+import operator
+
+import pytest
+from iregexp_check import check
+
+
+@dataclasses.dataclass
+class Case:
+    description: str
+    pattern: str
+
+
+VALID_TEST_CASES = [
+    Case("dot", r"a.b"),
+    Case("char_class_expr", r"[0-9]"),
+    Case("branch", r"foo|bar"),
+    Case("range_quantifier_exact", r"[ab]{3}"),
+    Case("range_quantifier", r"[ab]{3,5}"),
+    Case("range_quantifier_open_ended", r"[ab]{3,}"),
+    Case("char_class_expr_negation", r"[^ab]"),
+    Case("unicode_character_category_letter", r"\p{L}"),
+    Case("unicode_character_category_letter_uppercase", r"\p{Lu}"),
+    Case("unicode_character_category_letter_lowercase", r"\p{Ll}"),
+    Case("unicode_character_category_letter_titlecase", r"\p{Lt}"),
+    Case("unicode_character_category_letter_modifier", r"\p{Lm}"),
+    Case("unicode_character_category_letter_other", r"\p{Lo}"),
+    Case("unicode_character_category_mark_nonspcaing", r"\p{Mn}"),
+    Case("unicode_character_category_mark_spacing_combining", r"\p{Mc}"),
+    Case("unicode_character_category_mark_enclosing", r"\p{Me}"),
+    Case("unicode_character_category_number_decimal_digit", r"\p{Nd}"),
+    Case("unicode_character_category_number_letter", r"\p{Nl}"),
+    Case("unicode_character_category_number_other", r"\p{No}"),
+    Case("unicode_character_category_punctuation_connector", r"\p{Pc}"),
+    Case("unicode_character_category_punctuation_dash", r"\p{Pd}"),
+    Case("unicode_character_category_punctuation_open", r"\p{Ps}"),
+    Case("unicode_character_category_punctuation_close", r"\p{Pe}"),
+    Case("unicode_character_category_punctuation_initial_quote", r"\p{Pi}"),
+    Case("unicode_character_category_punctuation_final_quote", r"\p{Pf}"),
+    Case("unicode_character_category_punctuation_other", r"\p{Po}"),
+    Case("unicode_character_category_symbol_math", r"\p{Sm}"),
+    Case("unicode_character_category_symbol_currency", r"\p{Sc}"),
+    Case("unicode_character_category_symbol_modifier", r"\p{Sk}"),
+    Case("unicode_character_category_symbol_other", r"\p{So}"),
+    Case("unicode_character_category_separator_space", r"\p{Zs}"),
+    Case("unicode_character_category_separator_line", r"\p{Zl}"),
+    Case("unicode_character_category_separator_paragraph", r"\p{Zp}"),
+    Case("unicode_character_category_other_control", r"\p{Cc}"),
+    Case("unicode_character_category_other_format", r"\p{Cf}"),
+    Case("unicode_character_category_other_private_use", r"\p{Co}"),
+    Case("unicode_character_category_other_not_assigned", r"\p{Cn}"),
+    Case("unicode_character_category_inverted_letter", r"\P{L}"),
+    Case("unicode_character_category_inverted_letter_uppercase", r"\P{Lu}"),
+    Case("unicode_character_category_inverted_letter_lowercase", r"\P{Ll}"),
+    Case("unicode_character_category_inverted_letter_titlecase", r"\P{Lt}"),
+    Case("unicode_character_category_inverted_letter_modifier", r"\P{Lm}"),
+    Case("unicode_character_category_inverted_letter_other", r"\P{Lo}"),
+    Case("unicode_character_category_inverted_mark_nonspacing", r"\P{Mn}"),
+    Case("unicode_character_category_inverted_mark_spacing_combining", r"\P{Mc}"),
+    Case("unicode_character_category_inverted_mark_enclosing", r"\P{Me}"),
+    Case("unicode_character_category_inverted_number_decimal_digit", r"\P{Nd}"),
+    Case("unicode_character_category_inverted_number_letter", r"\P{Nl}"),
+    Case("unicode_character_category_inverted_number_other", r"\P{No}"),
+    Case("unicode_character_category_inverted_punctuation_connector", r"\P{Pc}"),
+    Case("unicode_character_category_inverted_punctuation_dash", r"\P{Pd}"),
+    Case("unicode_character_category_inverted_punctuation_open", r"\P{Ps}"),
+    Case("unicode_character_category_inverted_punctuation_close", r"\P{Pe}"),
+    Case("unicode_character_category_inverted_punctuation_initial_quote", r"\P{Pi}"),
+    Case("unicode_character_category_inverted_punctuation_final_quote", r"\P{Pf}"),
+    Case("unicode_character_category_inverted_punctuation_other", r"\P{Po}"),
+    Case("unicode_character_category_inverted_symbol_math", r"\P{Sm}"),
+    Case("unicode_character_category_inverted_symbol_currency", r"\P{Sc}"),
+    Case("unicode_character_category_inverted_symbol_modifier", r"\P{Sk}"),
+    Case("unicode_character_category_inverted_symbol_other", r"\P{So}"),
+    Case("unicode_character_category_inverted_separator_space", r"\P{Zs}"),
+    Case("unicode_character_category_inverted_separator_line", r"\P{Zl}"),
+    Case("unicode_character_category_inverted_separator_paragraph", r"\P{Zp}"),
+    Case("unicode_character_category_inverted_other_control", r"\P{Cc}"),
+    Case("unicode_character_category_inverted_other_format", r"\P{Cf}"),
+    Case("unicode_character_category_inverted_other_private_use", r"\P{Co}"),
+    Case("unicode_character_category_inverted_other_not_assigned", r"\P{Cn}"),
+]
+
+INVALID_TEST_CASES = [
+    Case("named_group", r"(?<group>[a-z]*)"),
+    Case("multi_char_escape", r"\d"),
+    Case("multi_char_escape_class_expr", r"[\S ]"),
+    Case("non_greedy_repetition", r"[0-9]*?"),
+    Case("back_reference", r"(\w)\1"),
+    Case("lookahead", r"(?=.*[a-z])(?=.*[A-Z])(?=.*)[a-zA-Z]{8,}"),
+    Case("lookbehind", r"(?<=[a-z]{4})\[a-z]{2}"),
+    Case("non_capturing_group", r"(?:[a-z]+)"),
+    Case("atomic_group", r"(?>[a-z]+)"),
+    Case("conditional_group", r"(?(1)a|b)"),
+    Case("comment", r"(?#comment)"),
+    Case("flag", r"(?i)[a-z]+"),
+]
+
+
+@pytest.mark.parametrize(
+    "case", VALID_TEST_CASES, ids=operator.attrgetter("description")
+)
+def test_valid_iregexp(case: Case) -> None:
+    assert check(case.pattern)
+
+
+@pytest.mark.parametrize(
+    "case", INVALID_TEST_CASES, ids=operator.attrgetter("description")
+)
+def test_invalid_iregexp(case: Case) -> None:
+    assert not check(case.pattern)
diff --git a/tests/test_parse.py b/tests/test_parse.py
@@ -71,11 +71,6 @@ class Case:
         query="$.some[?(@.thing >= 7)]",
         want="$['some'][?@['thing'] >= 7]",
     ),
-    Case(
-        description="filter with >=",
-        query="$.some[?(@.thing >= 7)]",
-        want="$['some'][?@['thing'] >= 7]",
-    ),
     Case(
         description="filter with !=",
         query="$.some[?(@.thing != 7)]",
+406 −0		cts.json
+78 −0		tests/filter.json
+54 −0		tests/functions/match.json
+42 −0		tests/functions/search.json