From 9cfc06aa78e0e71af80f7226085c2f64d2303f2d Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Mon, 23 Oct 2023 12:11:25 +0200 Subject: [PATCH 01/24] Introduce filter `Chain` helper methods --- internal/filter/types.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/internal/filter/types.go b/internal/filter/types.go index 4c1104d0..36bef5e1 100644 --- a/internal/filter/types.go +++ b/internal/filter/types.go @@ -80,6 +80,31 @@ func (c *Chain) ExtractConditions() []*Condition { return conditions } +// pop pops the last filter from the rules slice (if not empty) and returns it. +func (c *Chain) pop() Filter { + var rule Filter + if l := len(c.rules); l > 0 { + rule, c.rules = c.rules[l-1], c.rules[:l-1] + } + + return rule +} + +// top picks and erases the first element from its rules and returns it. +func (c *Chain) top() Filter { + var rule Filter + if len(c.rules) > 0 { + rule, c.rules = c.rules[0], c.rules[1:] + } + + return rule +} + +// add adds the given filter rules to the current chain. +func (c *Chain) add(rules ...Filter) { + c.rules = append(c.rules, rules...) +} + // CompOperator is a type used for grouping the individual comparison operators of a filter string. type CompOperator string From 7847585caf4439a8626cfe1eb8a75adc029637c6 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Thu, 26 Oct 2023 10:58:29 +0200 Subject: [PATCH 02/24] tests: Use `~` as an operator for wildcald matching --- internal/filter/parser_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/filter/parser_test.go b/internal/filter/parser_test.go index ca386f67..eeb4aa33 100644 --- a/internal/filter/parser_test.go +++ b/internal/filter/parser_test.go @@ -70,12 +70,12 @@ func TestFilter(t *testing.T) { expected = &Condition{op: UnEqual, column: "foo", value: "bar"} assert.Equal(t, expected, rule) - rule, err = Parse("foo=bar*") + rule, err = Parse("foo~bar*") assert.Nil(t, err, "There should be no errors but got: %s", err) expected = &Condition{op: Like, column: "foo", value: "bar*"} assert.Equal(t, expected, rule) - rule, err = Parse("foo!=bar*") + rule, err = Parse("foo!~bar*") assert.Nil(t, err, "There should be no errors but got: %s", err) expected = &Condition{op: UnLike, column: "foo", value: "bar*"} assert.Equal(t, expected, rule) From a45bec48f95424229560fab2b13a8917fbc6b1de Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Tue, 2 May 2023 09:36:27 +0200 Subject: [PATCH 03/24] Introduce `NewChain()` & `NewCondition()` funcs --- internal/filter/types.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/internal/filter/types.go b/internal/filter/types.go index 36bef5e1..9326cbe1 100644 --- a/internal/filter/types.go +++ b/internal/filter/types.go @@ -24,6 +24,15 @@ type Chain struct { rules []Filter } +func NewChain(op LogicalOp, rules ...Filter) (*Chain, error) { + switch op { + case None, All, Any: + return &Chain{rules: rules, op: op}, nil + default: + return nil, fmt.Errorf("invalid logical operator provided: %q", op) + } +} + // Eval evaluates the filter rule sets recursively based on their operator type. func (c *Chain) Eval(filterable Filterable) (bool, error) { switch c.op { @@ -130,6 +139,17 @@ type Condition struct { value string } +// NewCondition initiates a new Condition instance from the given data. +// Returns error if invalid CompOperator is provided. +func NewCondition(column string, op CompOperator, value string) (Filter, error) { + switch op { + case Equal, UnEqual, Like, UnLike, LessThan, LessThanEqual, GreaterThan, GreaterThanEqual: + return &Condition{op: op, column: column, value: value}, nil + default: + return nil, fmt.Errorf("invalid comparison operator provided: %q", op) + } +} + // Eval evaluates this Condition based on its operator. // Returns true when the filter evaluates to true false otherwise. func (c *Condition) Eval(filterable Filterable) (bool, error) { From d16679632101f8a0d0e8b0801bcefbdfd0d15d7d Mon Sep 17 00:00:00 2001 From: Julian Brost Date: Tue, 2 May 2023 09:47:58 +0200 Subject: [PATCH 04/24] `spew.Sdump()` filter & add some assertions in `FuzzParser()` --- internal/filter/parser_test.go | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/internal/filter/parser_test.go b/internal/filter/parser_test.go index eeb4aa33..1b069cc5 100644 --- a/internal/filter/parser_test.go +++ b/internal/filter/parser_test.go @@ -1,6 +1,7 @@ package filter import ( + "github.com/davecgh/go-spew/spew" "github.com/stretchr/testify/assert" "strings" "testing" @@ -210,5 +211,30 @@ func FuzzParser(f *testing.F) { if strings.Count(expr, "(") != strings.Count(expr, ")") { assert.Error(t, err) } + + if err == nil { + dump := spew.Sdump(f) + + assertDumpContainsAny := func(substrs ...string) { + for _, substr := range substrs { + if strings.Contains(dump, substr) { + return + } + } + + assert.Failf(t, "Parsed expression dump did not contain any expected string", + "Expression: %q\nExpected: %#v\n\n%s", expr, substrs, dump) + } + + if strings.Contains(expr, "&") { + assertDumpContainsAny("All") + } + if strings.Contains(expr, "|") { + assertDumpContainsAny("Any", "None") + } + if strings.Contains(expr, "!") { + assertDumpContainsAny("None", "UnEqual", "Unlike") + } + } }) } From 61d1b6df611e0fe32758c9b9ceffe03d96b95cb9 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Mon, 23 Oct 2023 12:22:16 +0200 Subject: [PATCH 05/24] Introduce `goyacc` filter parsing grammer rules --- internal/filter/parser.y | 180 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 internal/filter/parser.y diff --git a/internal/filter/parser.y b/internal/filter/parser.y new file mode 100644 index 00000000..9a0985bf --- /dev/null +++ b/internal/filter/parser.y @@ -0,0 +1,180 @@ +%{ + +package filter + +// reduceFilter reduces the given filter rules into a single filter chain (initiated with the provided operator). +// When the operator type of the first argument (Filter) is not of type filter.Any or the given operator is not +// of type filter.All, this will just create a new chain with the new op and append all the filter rules to it. +// Otherwise, it will pop the last pushed rule of that chain (first argument) and append it to the new *And chain. +// +// Example: `foo=bar|bar~foo&col!~val` +// The first argument `rule` is supposed to be a filter.Any Chain containing the first two conditions. +// We then call this function when the parser is processing the logical `&` op and the Unlike condition, +// and what this function will do is logically re-group the conditions into `foo=bar|(bar~foo&col!~val)`. +func reduceFilter(rule Filter, op string, rules ...Filter) Filter { + chain, ok := rule.(*Chain); + if ok && chain.op == Any && LogicalOp(op) == All { + // Retrieve the last pushed condition and append it to the new "And" chain instead + andChain, _ := NewChain(All, chain.pop()) + andChain.add(rules...) + + chain.add(andChain) + + return chain + } + + // If the given operator is the same as the already existsing chains operator (*chain), + // we don't need to create another chain of the same operator type. Avoids something + // like &Chain{op: All, &Chain{op: All, ...}} + if chain == nil || chain.op != LogicalOp(op) { + newChain, err := NewChain(LogicalOp(op), rule) + if err != nil { + // Just panic, filter.Parse will try to recover from this. + panic(err) + } + + chain = newChain + } + + chain.add(rules...) + + return chain +} +%} + +%union { + expr Filter + text string +} + +%type filter_rule +%type filter_chain +%type conditions_expr +%type maybe_negated_condition_expr +%type condition_expr +%type exists_expr + +%type comparison_op +%type optional_negation +%type identifier +%type logical_op + +%token T_EQUAL "=" +%token T_UNEQUAL "!" T_EQUAL +%token T_LIKE "~" +%token T_UNLIKE "!" T_LIKE +%token T_LESS_THAN "<" +%token T_GREATER_THAN ">" +%token T_LESS_THAN_OR_EQUAL "<" T_EQUAL +%token T_GREATER_THAN_OR_EQUAL ">" T_EQUAL +%token T_STRING +%token T_IDENTIFIER + +%type T_EQUAL +%type T_UNEQUAL +%type T_LIKE +%type T_UNLIKE +%type T_LESS_THAN +%type T_GREATER_THAN +%type T_LESS_THAN_OR_EQUAL +%type T_GREATER_THAN_OR_EQUAL + +%type "!" "&" "|" + +// This is just used for declaring explicit precedence and resolves shift/reduce conflicts +// in `filter_chain` and `conditions_expr` rules. +%nonassoc PREFER_SHIFTING_LOGICAL_OP + +%nonassoc T_EQUAL T_UNEQUAL T_LIKE T_UNLIKE +%nonassoc T_LESS_THAN T_LESS_THAN_OR_EQUAL T_GREATER_THAN T_GREATER_THAN_OR_EQUAL + +%left "!" "&" "|" +%left "(" +%right ")" + +%% + +filter_rule: filter_chain logical_op filter_chain + { + $$ = reduceFilter($1, $2, $3) + yylex.(*Lexer).rule = $$ + } + | filter_chain + ; + +filter_chain: conditions_expr logical_op maybe_negated_condition_expr + { + $$ = reduceFilter($1, $2, $3) + yylex.(*Lexer).rule = $$ + } + | conditions_expr %prec PREFER_SHIFTING_LOGICAL_OP + ; + +conditions_expr: maybe_negated_condition_expr logical_op maybe_negated_condition_expr + { + $$ = reduceFilter($1, $2, $3) + yylex.(*Lexer).rule = $$ + } + | maybe_negated_condition_expr %prec PREFER_SHIFTING_LOGICAL_OP + ; + +maybe_negated_condition_expr: optional_negation condition_expr + { + if $1 != "" { + // NewChain is only going to return an error if an invalid operator is specified, and since + // we explicitly provide the None operator, we don't expect an error to be returned. + $$, _ = NewChain(None, $2) + } else { + $$ = $2 + } + + yylex.(*Lexer).rule = $$ + } + +condition_expr: "(" filter_chain ")" + { + $$ = $2 + yylex.(*Lexer).rule = $$ + } + | identifier comparison_op identifier + { + cond, err := NewCondition($1, CompOperator($2), $3) + if err != nil { + // Something went wrong, so just panic and filter.Parse will try to recover from this. + panic(err) + } + + $$ = cond + yylex.(*Lexer).rule = $$ + } + | exists_expr + ; + +exists_expr: identifier + { + $$ = NewExists($1) + yylex.(*Lexer).rule = $$ + } + ; + +identifier: T_IDENTIFIER + | T_STRING + ; + +optional_negation: /* empty */ { $$ = "" } + | "!" + ; + +logical_op: "&" + | "|" + ; + +comparison_op: T_EQUAL + | T_UNEQUAL + | T_LIKE + | T_UNLIKE + | T_LESS_THAN + | T_LESS_THAN_OR_EQUAL + | T_GREATER_THAN + | T_GREATER_THAN_OR_EQUAL + ; From 890911197d2aa3e6ac36893537260c96f61f9840 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Mon, 23 Oct 2023 12:23:33 +0200 Subject: [PATCH 06/24] Introduce `Lexer` type and other parsing utils --- internal/filter/lexer.go | 144 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 internal/filter/lexer.go diff --git a/internal/filter/lexer.go b/internal/filter/lexer.go new file mode 100644 index 00000000..80b59bb3 --- /dev/null +++ b/internal/filter/lexer.go @@ -0,0 +1,144 @@ +//go:generate goyacc -v parser.output -o parser.go parser.y + +package filter + +import ( + "errors" + "fmt" + "regexp" + "strings" + "text/scanner" +) + +// regex contains a compiled regexp and is used by the Lexer to match filter identifiers. +// Currently, it allows to match any character except these inside the curly braces. +var regex = regexp.MustCompile("[^!&|~<>=()]") + +// Parse wraps the auto generated yyParse function. +// It parses the given filter string and returns on success a Filter instance. +func Parse(expr string) (rule Filter, err error) { + lex := new(Lexer) + lex.IsIdentRune = isIdentRune + lex.Init(strings.NewReader(expr)) + + // scanner.Init sets the error function to nil, therefore, we have to register + // our error function after the scanner initialization. + lex.Scanner.Error = lex.ScanError + + // Enable parsers error verbose to get more context of the parsing failures + yyErrorVerbose = true + + defer func() { + // All the grammar rules panics when encountering any errors while reducing the filter rules, so try + // to recover from it and return an error instead. Since we're used a named return values, we can set + // the err value even in deferred function. See https://go.dev/blog/defer-panic-and-recover + if r := recover(); r != nil { + err = errors.New(fmt.Sprint(r)) + } + }() + + yyParse(lex) + + rule, err = lex.rule, lex.err + return +} + +// Lexer is used to tokenize the filter input into a set literals. +// This is just a wrapper around the Scanner type and implements the yyLexer interface used by the parser. +type Lexer struct { + scanner.Scanner + + rule Filter + err error +} + +func (l *Lexer) Lex(yyval *yySymType) int { + token := l.Scan() + lit := l.TokenText() + yyval.text = lit + if token == scanner.Ident { + return T_IDENTIFIER + } + + if token == scanner.String { + return T_STRING + } + + switch lit { + case "&": + return '&' + case "|": + return '|' + case "~": + return T_LIKE + case "=": + return T_EQUAL + case "(": + return '(' + case ")": + return ')' + case "!": + next := l.Peek() + switch next { + case '=', '~': + yyval.text = "!" + string(next) + // Since we manually picked the next char input, we also need to advance the internal scanner + // states by calling Scan. Otherwise, the same rune will be scanned multiple times. + l.Scan() + + if next == '~' { + return T_UNLIKE + } else { + return T_UNEQUAL + } + default: + return '!' + } + case "<": + if next := l.Peek(); next == '=' { + yyval.text = "<=" + // Since we manually picked the next char input, we also need to advance the internal scanner + // states by calling Scan. Otherwise, the same rune will be scanned multiple times. + l.Scan() + + return T_LESS_THAN_OR_EQUAL + } + + return T_LESS_THAN + case ">": + if next := l.Peek(); next == '=' { + yyval.text = ">=" + // Since we manually picked the next char input, we also need to advance the internal scanner + // states by calling Scan. Otherwise, the same rune will be scanned multiple times. + l.Scan() + + return T_GREATER_THAN_OR_EQUAL + } + + return T_GREATER_THAN + } + + // No more inputs to scan that we are interested in. + // Scan returns EOF as well if there's no more token to stream, but we just want to be explicit. + return scanner.EOF +} + +// Error receives any syntax/semantic errors produced by the parser. +// +// The parser never returns an error when it fails to parse, but will forward the errors to our lexer with some +// additional context instead. This function then wraps the provided err and adds line, column number and offset +// to the error string. Error is equivalent to "yyerror" in the original yacc. +func (l *Lexer) Error(s string) { + l.err = errors.New(fmt.Sprintf("%d:%d (%d): %s", l.Line, l.Column, l.Offset, s)) +} + +// ScanError is used to capture all errors the Scanner encounters. +// +// It's a rare case that the scanner actually will fail to scan the input string, but in these cases it will just +// output to std.Err and we won't be able to notice this. Hence, this function is registered by the filter.Parse +// function after the Lexer initialisation. +func (l *Lexer) ScanError(_ *scanner.Scanner, msg string) { l.Error(msg) } + +// isIdentRune provides custom implementation of scanner.IsIdentRune. +// This function determines whether a given character is allowed to be part of an identifier. +func isIdentRune(ch rune, _ int) bool { return regex.MatchString(string(ch)) } From b0d5cae28a6769b5b85b07a17987b8bbeb405e03 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Tue, 2 May 2023 09:46:55 +0200 Subject: [PATCH 07/24] Add extended filter/parser tests & adjust expected error messages --- internal/filter/parser_test.go | 363 ++++++++++++++++++++++++++------- internal/object/object_test.go | 4 +- 2 files changed, 294 insertions(+), 73 deletions(-) diff --git a/internal/filter/parser_test.go b/internal/filter/parser_test.go index 1b069cc5..a2fdd94f 100644 --- a/internal/filter/parser_test.go +++ b/internal/filter/parser_test.go @@ -1,7 +1,6 @@ package filter import ( - "github.com/davecgh/go-spew/spew" "github.com/stretchr/testify/assert" "strings" "testing" @@ -10,57 +9,79 @@ import ( func TestParser(t *testing.T) { t.Parallel() - t.Run("MissingLogicalOperatorsAfterConditionsAreDetected", func(t *testing.T) { - _, err := Parse("(a=b|c=d)e=f") - - expected := "invalid filter '(a=b|c=d)e=f', unexpected e at pos 10: Expected logical operator" - assert.EqualError(t, err, expected, "Errors should be the same") - }) + t.Run("ParseInvalidFilters", func(t *testing.T) { + t.Parallel() - t.Run("MissingLogicalOperatorsAfterOperatorsAreDetected", func(t *testing.T) { - _, err := Parse("(a=b|c=d|)e=f") + _, err := Parse("(a=b|c=d)e=f") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected T_IDENTIFIER") - expected := "invalid filter '(a=b|c=d|)e=f', unexpected e at pos 11: Expected logical operator" - assert.EqualError(t, err, expected, "Errors should be the same") - }) + _, err = Parse("(a=b|c=d|)e=f") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\", expecting T_STRING or T_IDENTIFIER or \"(\"") - t.Run("ParserIdentifiesInvalidExpression", func(t *testing.T) { - _, err := Parse("col=(") - assert.EqualError(t, err, "invalid filter 'col=(', unexpected ( at pos 5", "Errors should be the same") + _, err = Parse("col=(") + assert.EqualError(t, err, "1:5 (4): syntax error: unexpected \"(\", expecting T_STRING or T_IDENTIFIER") _, err = Parse("(((x=a)&y=b") - assert.EqualError(t, err, "invalid filter '(((x=a)&y=b', missing 2 closing ')' at pos 11", "Errors should be the same") + assert.EqualError(t, err, "1:12 (11): syntax error: unexpected $end, expecting \")\"") _, err = Parse("(x=a)&y=b)") - assert.EqualError(t, err, "invalid filter '(x=a)&y=b)', unexpected ) at pos 10", "Errors should be the same") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\"") _, err = Parse("!(&") - assert.EqualError(t, err, "invalid filter '!(&', unexpected & at pos 3", "Errors should be the same") - - _, err = Parse("!(!&") - assert.EqualError(t, err, "invalid filter '!(!&', unexpected & at pos 4: operator level 1", "Errors should be the same") - - _, err = Parse("!(|test") - assert.EqualError(t, err, "invalid filter '!(|test', unexpected | at pos 3", "Errors should be the same") + assert.EqualError(t, err, "1:3 (2): syntax error: unexpected \"&\", expecting T_STRING or T_IDENTIFIER or \"(\"") _, err = Parse("foo&bar=(te(st)") - assert.EqualError(t, err, "invalid filter 'foo&bar=(te(st)', unexpected ( at pos 9", "Errors should be the same") + assert.EqualError(t, err, "1:9 (8): syntax error: unexpected \"(\", expecting T_STRING or T_IDENTIFIER") _, err = Parse("foo&bar=te(st)") - assert.EqualError(t, err, "invalid filter 'foo&bar=te(st)', unexpected ( at pos 11", "Errors should be the same") + assert.EqualError(t, err, "1:11 (10): syntax error: unexpected \"(\"") _, err = Parse("foo&bar=test)") - assert.EqualError(t, err, "invalid filter 'foo&bar=test)', unexpected ) at pos 13", "Errors should be the same") + assert.EqualError(t, err, "1:13 (12): syntax error: unexpected \")\"") _, err = Parse("!()|&()&)") - assert.EqualError(t, err, "invalid filter '!()|&()&)', unexpected closing ')' at pos 9", "Errors should be the same") + assert.EqualError(t, err, "1:3 (2): syntax error: unexpected \")\", expecting T_STRING or T_IDENTIFIER or \"(\"") + + _, err = Parse("=foo") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected T_EQUAL, expecting T_STRING or T_IDENTIFIER or \"(\"") + + _, err = Parse("foo>") + assert.EqualError(t, err, "1:5 (4): syntax error: unexpected $end, expecting T_STRING or T_IDENTIFIER") + + _, err = Parse("foo==") + assert.EqualError(t, err, "1:5 (4): syntax error: unexpected T_EQUAL, expecting T_STRING or T_IDENTIFIER") + + _, err = Parse("=>foo") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected T_EQUAL, expecting T_STRING or T_IDENTIFIER or \"(\"") + + _, err = Parse("&foo") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected \"&\", expecting T_STRING or T_IDENTIFIER or \"(\"") + + _, err = Parse("&&foo") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected \"&\", expecting T_STRING or T_IDENTIFIER or \"(\"") + + _, err = Parse("(&foo=bar)") + assert.EqualError(t, err, "1:2 (1): syntax error: unexpected \"&\", expecting T_STRING or T_IDENTIFIER or \"(\"") + + _, err = Parse("(foo=bar|)") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\", expecting T_STRING or T_IDENTIFIER or \"(\"") + + _, err = Parse("((((((") + assert.EqualError(t, err, "1:7 (6): syntax error: unexpected $end, expecting T_STRING or T_IDENTIFIER or \"(\"") + + _, err = Parse("foo&bar&col=val!=val") + assert.EqualError(t, err, "1:17 (16): syntax error: unexpected T_UNEQUAL") + + _, err = Parse("col%7umn") + assert.EqualError(t, err, "invalid URL escape \"%7u\"") + + _, err = Parse("((0&((((((((((((((((((((((0=0)") + assert.EqualError(t, err, "1:31 (30): syntax error: unexpected $end, expecting \")\"") }) -} -func TestFilter(t *testing.T) { - t.Parallel() + t.Run("ParseAllKindOfSimpleFilters", func(t *testing.T) { + t.Parallel() - t.Run("ParserIdentifiesAllKindOfFilters", func(t *testing.T) { rule, err := Parse("foo=bar") assert.Nil(t, err, "There should be no errors but got: %s", err) expected := &Condition{op: Equal, column: "foo", value: "bar"} @@ -120,42 +141,257 @@ func TestFilter(t *testing.T) { rule, err = Parse("foo") assert.Nil(t, err, "There should be no errors but got: %s", err) assert.Equal(t, &Exists{column: "foo"}, rule) + }) + + t.Run("ParseChain", func(t *testing.T) { + t.Parallel() + + var expected Filter + rule, err := Parse("!foo=bar") + expected = &Chain{op: NONE, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}} + assert.Equal(t, expected, rule) - rule, err = Parse("!(foo=bar|bar=foo)&(foo=bar|bar=foo)") + rule, err = Parse("foo=bar&bar=foo") assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: ALL, rules: []Filter{ + &Condition{op: Equal, column: "foo", value: "bar"}, + &Condition{op: Equal, column: "bar", value: "foo"}, + }} + assert.Equal(t, expected, rule) - expectedChain := &Chain{op: All, rules: []Filter{ - &Chain{op: None, rules: []Filter{ + rule, err = Parse("foo=bar&bar=foo|col=val") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: ANY, rules: []Filter{ + &Chain{op: ALL, rules: []Filter{ &Condition{op: Equal, column: "foo", value: "bar"}, &Condition{op: Equal, column: "bar", value: "foo"}, }}, - &Chain{op: Any, rules: []Filter{ + &Condition{op: Equal, column: "col", value: "val"}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("foo=bar|bar=foo") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: ANY, rules: []Filter{ + &Condition{op: Equal, column: "foo", value: "bar"}, + &Condition{op: Equal, column: "bar", value: "foo"}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("(foo=bar)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Condition{op: Equal, column: "foo", value: "bar"} + assert.Equal(t, expected, rule) + + rule, err = Parse("(!foo=bar)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: NONE, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}} + assert.Equal(t, expected, rule) + + rule, err = Parse("!(foo=bar)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: NONE, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}} + assert.Equal(t, expected, rule) + + rule, err = Parse("!(!foo=bar)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: NONE, rules: []Filter{ + &Chain{op: NONE, rules: []Filter{ + &Condition{op: Equal, column: "foo", value: "bar"}, + }}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("!(foo=bar|bar=foo)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: NONE, rules: []Filter{ + &Chain{op: ANY, rules: []Filter{ &Condition{op: Equal, column: "foo", value: "bar"}, &Condition{op: Equal, column: "bar", value: "foo"}, }}, }} - assert.Equal(t, expectedChain, rule) - }) + assert.Equal(t, expected, rule) - t.Run("ParserIdentifiesSingleCondition", func(t *testing.T) { - rule, err := Parse("foo=bar") + rule, err = Parse("((!foo=bar)&bar!=foo)") assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: ALL, rules: []Filter{ + &Chain{op: NONE, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}}, + &Condition{op: UnEqual, column: "bar", value: "foo"}, + }} + assert.Equal(t, expected, rule) - expected := &Condition{op: Equal, column: "foo", value: "bar"} - assert.Equal(t, expected, rule, "Parser does not parse single condition correctly") + rule, err = Parse("!foo&!bar") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: ALL, rules: []Filter{ + &Chain{op: NONE, rules: []Filter{&Exists{column: "foo"}}}, + &Chain{op: NONE, rules: []Filter{&Exists{column: "bar"}}}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("!(!foo|bar)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: NONE, rules: []Filter{ + &Chain{op: ANY, rules: []Filter{ + &Chain{op: NONE, rules: []Filter{&Exists{column: "foo"}}}, + &Exists{column: "bar"}, + }}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("!(!(foo|bar))") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: NONE, rules: []Filter{ + &Chain{op: NONE, rules: []Filter{ + &Chain{op: ANY, rules: []Filter{ + &Exists{column: "foo"}, + &Exists{column: "bar"}}, + }, + }}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("foo=bar&bar!=foo") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: ALL, rules: []Filter{ + &Condition{op: Equal, column: "foo", value: "bar"}, + &Condition{op: UnEqual, column: "bar", value: "foo"}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("!(foo=bar|bar=foo)&(foo!=bar|bar!=foo)") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: ALL, rules: []Filter{ + &Chain{op: NONE, rules: []Filter{ + &Chain{op: ANY, rules: []Filter{ + &Condition{op: Equal, column: "foo", value: "bar"}, + &Condition{op: Equal, column: "bar", value: "foo"}, + }}, + }}, + &Chain{op: ANY, rules: []Filter{ + &Condition{op: UnEqual, column: "foo", value: "bar"}, + &Condition{op: UnEqual, column: "bar", value: "foo"}, + }}, + }} + assert.Equal(t, expected, rule) + + rule, err = Parse("foo=bar&bar!=foo&john>doe|doedoe|doedoe|doedoe|doebar") @@ -206,35 +448,14 @@ func FuzzParser(f *testing.F) { f.Add("col%29umn>val%29ue") f.Fuzz(func(t *testing.T, expr string) { - _, err := Parse(expr) + rule, err := Parse(expr) + t.Logf("Parsing filter expression %q - ERROR: %v", expr, err) if strings.Count(expr, "(") != strings.Count(expr, ")") { assert.Error(t, err) - } - - if err == nil { - dump := spew.Sdump(f) - - assertDumpContainsAny := func(substrs ...string) { - for _, substr := range substrs { - if strings.Contains(dump, substr) { - return - } - } - - assert.Failf(t, "Parsed expression dump did not contain any expected string", - "Expression: %q\nExpected: %#v\n\n%s", expr, substrs, dump) - } - - if strings.Contains(expr, "&") { - assertDumpContainsAny("All") - } - if strings.Contains(expr, "|") { - assertDumpContainsAny("Any", "None") - } - if strings.Contains(expr, "!") { - assertDumpContainsAny("None", "UnEqual", "Unlike") - } + assert.Nil(t, rule) + } else if err == nil && !strings.ContainsAny(expr, "!&|!>~<=") { + assert.IsType(t, new(Exists), rule) } }) } diff --git a/internal/object/object_test.go b/internal/object/object_test.go index ee6f250f..1989adce 100644 --- a/internal/object/object_test.go +++ b/internal/object/object_test.go @@ -29,8 +29,8 @@ func TestFilter(t *testing.T) { {"Host", false}, {"service", false}, {"!service", true}, - {"host=*.example.com&hostgroup/database-server", true}, - {"host=*.example.com&!hostgroup/database-server", false}, + {"host~*.example.com&hostgroup/database-server", true}, + {"host~*.example.com&!hostgroup/database-server", false}, {"!service&(country=DE&hostgroup/database-server)", true}, {"!service&!(country=AT|country=CH)", true}, {"hostgroup/Nuremberg %28Germany%29", true}, From 89320eb1412820aa18d4898a619b3981ea6fbc7f Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Mon, 20 Nov 2023 16:08:17 +0100 Subject: [PATCH 08/24] Add auto-generated goyacc filter `Parser` --- internal/filter/parser.go | 772 +++++++++++++++++++++++++------------- 1 file changed, 502 insertions(+), 270 deletions(-) diff --git a/internal/filter/parser.go b/internal/filter/parser.go index 71dfe6b4..32e624bb 100644 --- a/internal/filter/parser.go +++ b/internal/filter/parser.go @@ -1,357 +1,589 @@ -package filter +// Code generated by goyacc -v parser.output -o parser.go parser.y. DO NOT EDIT. -import ( - "fmt" - "net/url" - "strings" -) +//line parser.y:2 -type Parser struct { - tag string - pos, length, openParenthesis int -} +package filter -// Parse parses an object filter expression. -func Parse(expression string) (Filter, error) { - parser := &Parser{tag: expression, length: len(expression)} - if parser.length == 0 { - return &Chain{op: All}, nil +import __yyfmt__ "fmt" + +// reduceFilter reduces the given filter rules into a single filter chain (initiated with the provided operator). +// When the operator type of the first argument (Filter) is not of type filter.Any or the given operator is not +// of type filter.All, this will just create a new chain with the new op and append all the filter rules to it. +// Otherwise, it will pop the last pushed rule of that chain (first argument) and append it to the new *And chain. +// +// Example: `foo=bar|bar~foo&col!~val` +// The first argument `rule` is supposed to be a filter.Any Chain containing the first two conditions. +// We then call this function when the parser is processing the logical `&` op and the Unlike condition, +// and what this function will do is logically re-group the conditions into `foo=bar|(bar~foo&col!~val)`. +// +//line parser.y:3 +func reduceFilter(rule Filter, op string, rules ...Filter) Filter { + chain, ok := rule.(*Chain) + if ok && chain.op == Any && LogicalOp(op) == All { + // Retrieve the last pushed condition and append it to the new "And" chain instead + andChain, _ := NewChain(All, chain.pop()) + andChain.add(rules...) + + chain.add(andChain) + + return chain } - return parser.readFilter(0, "", nil) -} - -// readFilter reads the entire filter from the Parser.tag and derives a filter.Filter from it. -// Returns an error on parsing failure. -func (p *Parser) readFilter(nestingLevel int, operator string, rules []Filter) (Filter, error) { - negate := false - for p.pos < p.length { - condition, err := p.readCondition() + // If the given operator is the same as the already existsing chains operator (*chain), + // we don't need to create another chain of the same operator type. Avoids something + // like &Chain{op: All, &Chain{op: All, ...}} + if chain == nil || chain.op != LogicalOp(op) { + newChain, err := NewChain(LogicalOp(op), rule) if err != nil { - return nil, err + // Just panic, filter.Parse will try to recover from this. + panic(err) } - next := p.readChar() - if condition == nil { - if next == "!" { - negate = true - continue - } - - if operator == "" && len(rules) > 0 && (next == "&" || next == "|") { - operator = next - continue - } + chain = newChain + } - if next == "" { - break - } + chain.add(rules...) - if next == ")" { - p.openParenthesis-- + return chain +} - if nestingLevel > 0 { - next = p.nextChar() - if next != "" && next != "&" && next != "|" && next != ")" { - p.pos++ - return nil, p.parseError(next, "Expected logical operator") - } +//line parser.y:45 +type yySymType struct { + yys int + expr Filter + text string +} - break - } +const T_EQUAL = 57346 +const T_UNEQUAL = 57347 +const T_LIKE = 57348 +const T_UNLIKE = 57349 +const T_LESS_THAN = 57350 +const T_GREATER_THAN = 57351 +const T_LESS_THAN_OR_EQUAL = 57352 +const T_GREATER_THAN_OR_EQUAL = 57353 +const T_STRING = 57354 +const T_IDENTIFIER = 57355 +const PREFER_SHIFTING_LOGICAL_OP = 57356 + +var yyToknames = [...]string{ + "$end", + "error", + "$unk", + "T_EQUAL", + "\"=\"", + "T_UNEQUAL", + "\"!\"", + "T_LIKE", + "\"~\"", + "T_UNLIKE", + "T_LESS_THAN", + "\"<\"", + "T_GREATER_THAN", + "\">\"", + "T_LESS_THAN_OR_EQUAL", + "T_GREATER_THAN_OR_EQUAL", + "T_STRING", + "T_IDENTIFIER", + "\"&\"", + "\"|\"", + "PREFER_SHIFTING_LOGICAL_OP", + "\"(\"", + "\")\"", +} - return nil, p.parseError(next, "") - } +var yyStatenames = [...]string{} - if next == "(" { - if p.nextChar() == "&" || p.nextChar() == "|" { - // When a logical operator follows directly after the opening parenthesis "(", - // this can't be a valid expression. E.g. "!(&" - next = p.readChar() +const yyEofCode = 1 +const yyErrCode = 2 +const yyInitialStackSize = 16 - return nil, p.parseError(next, "") - } +//line yacctab:1 +var yyExca = [...]int8{ + -1, 1, + 1, -1, + -2, 0, +} - p.openParenthesis++ +const yyPrivate = 57344 - op := "" - if negate { - op = "!" - } +const yyLast = 34 - rule, err := p.readFilter(nestingLevel+1, op, nil) - if err != nil { - return nil, err - } +var yyAct = [...]int8{ + 17, 16, 31, 14, 23, 13, 24, 5, 25, 22, + 26, 27, 4, 29, 2, 28, 30, 8, 9, 17, + 16, 6, 18, 19, 20, 7, 32, 15, 21, 10, + 11, 12, 3, 1, +} - rules = append(rules, rule) - negate = false - continue - } +var yyPact = [...]int16{ + 14, -1000, -2, -2, -2, -17, -1000, 14, -1000, -1000, + 14, 14, -1000, 14, 0, -1000, -1000, -1000, -1000, -1000, + -1000, -21, 2, -1000, -1000, -1000, -1000, -1000, -1000, -1000, + -1000, -1000, -1000, +} - if next == operator { - continue - } +var yyPgo = [...]int8{ + 0, 33, 14, 32, 12, 31, 27, 9, 7, 3, + 25, +} - // When the current operator is a "!", the next one can't be a logical operator. - if operator != "!" && (next == "&" || next == "|") { - if operator == "&" { - if len(rules) > 1 { - rules = []Filter{&Chain{op: All, rules: rules}} - } +var yyR1 = [...]int8{ + 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, + 5, 6, 9, 9, 8, 8, 10, 10, 7, 7, + 7, 7, 7, 7, 7, 7, +} - operator = next - } else if operator == "|" || (operator == "!" && next == "&") { - // The last pushed filter chain - lastRule := rules[len(rules)-1] - // Erase it from our Rules slice - rules = rules[:len(rules)-1] +var yyR2 = [...]int8{ + 0, 3, 1, 3, 1, 3, 1, 2, 3, 3, + 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, +} - rule, err := p.readFilter(nestingLevel+1, next, []Filter{lastRule}) - if err != nil { - return nil, err - } +var yyChk = [...]int16{ + -1000, -1, -2, -3, -4, -8, 7, -10, 19, 20, + -10, -10, -5, 22, -9, -6, 18, 17, -2, -4, + -4, -2, -7, 4, 6, 8, 10, 11, 15, 13, + 16, 23, -9, +} - rules = append(rules, rule) - } +var yyDef = [...]int8{ + 14, -2, 2, 4, 6, 0, 15, 14, 16, 17, + 14, 14, 7, 14, 11, 10, 12, 13, 1, 3, + 5, 0, 0, 18, 19, 20, 21, 22, 23, 24, + 25, 8, 9, +} - continue - } +var yyTok1 = [...]int8{ + 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 7, 3, 3, 3, 3, 19, 3, + 22, 23, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 12, 5, 14, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 20, 3, 9, +} - return nil, p.parseError(next, fmt.Sprintf("operator level %d", nestingLevel)) - } else { - if negate { - negate = false - rules = append(rules, &Chain{op: None, rules: []Filter{condition}}) - } else { - rules = append(rules, condition) - } +var yyTok2 = [...]int8{ + 2, 3, 4, 6, 8, 10, 11, 13, 15, 16, + 17, 18, 21, +} - if next == "" { - break - } +var yyTok3 = [...]int8{ + 0, +} - if next == ")" { - p.openParenthesis-- +var yyErrorMessages = [...]struct { + state int + token int + msg string +}{} - if nestingLevel > 0 { - next = p.nextChar() - if next != "" && next != "&" && next != "|" && next != ")" { - p.pos++ - return nil, p.parseError(next, "Expected logical operator") - } +//line yaccpar:1 - break - } +/* parser for yacc output */ - return nil, p.parseError(next, "") - } +var ( + yyDebug = 0 + yyErrorVerbose = false +) - if next == operator { - continue - } +type yyLexer interface { + Lex(lval *yySymType) int + Error(s string) +} - if next == "&" || next == "|" { - if operator == "" || operator == "&" { - if operator == "&" && len(rules) > 1 { - all := &Chain{op: All, rules: rules} - rules = []Filter{all} - } +type yyParser interface { + Parse(yyLexer) int + Lookahead() int +} - operator = next - } else if operator == "" || (operator == "!" && next == "&") { - // The last pushed filter chain - lastRule := rules[len(rules)-1] - // Erase it from our Rules slice - rules = rules[:len(rules)-1] +type yyParserImpl struct { + lval yySymType + stack [yyInitialStackSize]yySymType + char int +} - rule, err := p.readFilter(nestingLevel+1, next, []Filter{lastRule}) - if err != nil { - return nil, err - } +func (p *yyParserImpl) Lookahead() int { + return p.char +} - rules = append(rules, rule) - } +func yyNewParser() yyParser { + return &yyParserImpl{} +} - continue - } +const yyFlag = -1000 - return nil, p.parseError(next, "") +func yyTokname(c int) string { + if c >= 1 && c-1 < len(yyToknames) { + if yyToknames[c-1] != "" { + return yyToknames[c-1] } } + return __yyfmt__.Sprintf("tok-%v", c) +} - if nestingLevel == 0 && p.pos < p.length { - return nil, p.parseError(operator, "Did not read full filter") +func yyStatname(s int) string { + if s >= 0 && s < len(yyStatenames) { + if yyStatenames[s] != "" { + return yyStatenames[s] + } } + return __yyfmt__.Sprintf("state-%v", s) +} - if nestingLevel == 0 && p.openParenthesis > 0 { - return nil, fmt.Errorf("invalid filter '%s', missing %d closing ')' at pos %d", p.tag, p.openParenthesis, p.pos) - } +func yyErrorMessage(state, lookAhead int) string { + const TOKSTART = 4 - if nestingLevel == 0 && p.openParenthesis < 0 { - return nil, fmt.Errorf("invalid filter '%s', unexpected closing ')' at pos %d", p.tag, p.pos) + if !yyErrorVerbose { + return "syntax error" } - var chain Filter - switch operator { - case "&": - chain = &Chain{op: All, rules: rules} - case "|": - chain = &Chain{op: Any, rules: rules} - case "!": - chain = &Chain{op: None, rules: rules} - case "": - if nestingLevel == 0 && rules != nil { - // There is only one filter tag, no chain - return rules[0], nil + for _, e := range yyErrorMessages { + if e.state == state && e.token == lookAhead { + return "syntax error: " + e.msg } - - chain = &Chain{op: All, rules: rules} - default: - return nil, p.parseError(operator, "") } - return chain, nil -} + res := "syntax error: unexpected " + yyTokname(lookAhead) -// readCondition reads the next filter.Filter. -// returns nil if there is no char to read and an error on parsing failure. -func (p *Parser) readCondition() (Filter, error) { - column, err := p.readColumn() - if err != nil || column == "" { - return nil, err - } + // To match Bison, suggest at most four expected tokens. + expected := make([]int, 0, 4) - operator := "" - if strings.Contains("=>= 0 && n < yyLast && int(yyChk[int(yyAct[n])]) == tok { + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) + } } - if operator == "" { - return NewExists(column), nil - } + if yyDef[state] == -2 { + i := 0 + for yyExca[i] != -1 || int(yyExca[i+1]) != state { + i += 2 + } - if strings.Contains(">= 0; i += 2 { + tok := int(yyExca[i]) + if tok < TOKSTART || yyExca[i+1] == 0 { + continue + } + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) } - } - value, err := p.readValue() - if err != nil { - return nil, err + // If the default action is to accept or reduce, give up. + if yyExca[i+1] != 0 { + return res + } } - condition, err := p.createCondition(column, operator, value) - if err != nil { - return nil, err + for i, tok := range expected { + if i == 0 { + res += ", expecting " + } else { + res += " or " + } + res += yyTokname(tok) } - - return condition, nil + return res } -// createCondition creates a filter.Filter based on the given operator. -// returns nil when invalid operator is given. -func (p *Parser) createCondition(column string, operator string, value string) (Filter, error) { - column = strings.TrimSpace(column) - switch operator { - case "=": - if strings.Contains(value, "*") { - return &Condition{op: Like, column: column, value: value}, nil +func yylex1(lex yyLexer, lval *yySymType) (char, token int) { + token = 0 + char = lex.Lex(lval) + if char <= 0 { + token = int(yyTok1[0]) + goto out + } + if char < len(yyTok1) { + token = int(yyTok1[char]) + goto out + } + if char >= yyPrivate { + if char < yyPrivate+len(yyTok2) { + token = int(yyTok2[char-yyPrivate]) + goto out } - - return &Condition{op: Equal, column: column, value: value}, nil - case "!=": - if strings.Contains(value, "*") { - return &Condition{op: UnLike, column: column, value: value}, nil + } + for i := 0; i < len(yyTok3); i += 2 { + token = int(yyTok3[i+0]) + if token == char { + token = int(yyTok3[i+1]) + goto out } + } - return &Condition{op: UnEqual, column: column, value: value}, nil - case ">": - return &Condition{op: GreaterThan, column: column, value: value}, nil - case ">=": - return &Condition{op: GreaterThanEqual, column: column, value: value}, nil - case "<": - return &Condition{op: LessThan, column: column, value: value}, nil - case "<=": - return &Condition{op: LessThanEqual, column: column, value: value}, nil - default: - return nil, fmt.Errorf("invalid operator %s provided", operator) +out: + if token == 0 { + token = int(yyTok2[1]) /* unknown char */ + } + if yyDebug >= 3 { + __yyfmt__.Printf("lex %s(%d)\n", yyTokname(token), uint(char)) } + return char, token } -// readColumn reads a column name from the Parser.tag. -// returns empty string if there is no char to read. -func (p *Parser) readColumn() (string, error) { - return url.QueryUnescape(p.readUntil("=()&|><") - if value == "" { - return "", nil +func (yyrcvr *yyParserImpl) Parse(yylex yyLexer) int { + var yyn int + var yyVAL yySymType + var yyDollar []yySymType + _ = yyDollar // silence set and not used + yyS := yyrcvr.stack[:] + + Nerrs := 0 /* number of errors */ + Errflag := 0 /* error recovery flag */ + yystate := 0 + yyrcvr.char = -1 + yytoken := -1 // yyrcvr.char translated into internal numbering + defer func() { + // Make sure we report no lookahead when not parsing. + yystate = -1 + yyrcvr.char = -1 + yytoken = -1 + }() + yyp := -1 + goto yystack + +ret0: + return 0 + +ret1: + return 1 + +yystack: + /* put a state and value onto the stack */ + if yyDebug >= 4 { + __yyfmt__.Printf("char %v in %v\n", yyTokname(yytoken), yyStatname(yystate)) } - return url.QueryUnescape(value) -} + yyp++ + if yyp >= len(yyS) { + nyys := make([]yySymType, len(yyS)*2) + copy(nyys, yyS) + yyS = nyys + } + yyS[yyp] = yyVAL + yyS[yyp].yys = yystate -// readUntil reads chars until any of the given characters -// May return empty string if there is no char to read -func (p *Parser) readUntil(chars string) string { - var buffer string - for char := p.readChar(); char != ""; char = p.readChar() { - if strings.Contains(chars, char) { - p.pos-- - break +yynewstate: + yyn = int(yyPact[yystate]) + if yyn <= yyFlag { + goto yydefault /* simple state */ + } + if yyrcvr.char < 0 { + yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval) + } + yyn += yytoken + if yyn < 0 || yyn >= yyLast { + goto yydefault + } + yyn = int(yyAct[yyn]) + if int(yyChk[yyn]) == yytoken { /* valid shift */ + yyrcvr.char = -1 + yytoken = -1 + yyVAL = yyrcvr.lval + yystate = yyn + if Errflag > 0 { + Errflag-- } - - buffer += char + goto yystack } - return buffer -} - -// readChar peeks the next char of the Parser.tag and increments the Parser.pos by one -// returns empty if there is no char to read -func (p *Parser) readChar() string { - if p.pos < p.length { - pos := p.pos - p.pos++ +yydefault: + /* default state action */ + yyn = int(yyDef[yystate]) + if yyn == -2 { + if yyrcvr.char < 0 { + yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval) + } - return string(p.tag[pos]) + /* look through exception table */ + xi := 0 + for { + if yyExca[xi+0] == -1 && int(yyExca[xi+1]) == yystate { + break + } + xi += 2 + } + for xi += 2; ; xi += 2 { + yyn = int(yyExca[xi+0]) + if yyn < 0 || yyn == yytoken { + break + } + } + yyn = int(yyExca[xi+1]) + if yyn < 0 { + goto ret0 + } } + if yyn == 0 { + /* error ... attempt to resume parsing */ + switch Errflag { + case 0: /* brand new error */ + yylex.Error(yyErrorMessage(yystate, yytoken)) + Nerrs++ + if yyDebug >= 1 { + __yyfmt__.Printf("%s", yyStatname(yystate)) + __yyfmt__.Printf(" saw %s\n", yyTokname(yytoken)) + } + fallthrough + + case 1, 2: /* incompletely recovered error ... try again */ + Errflag = 3 + + /* find a state where "error" is a legal shift action */ + for yyp >= 0 { + yyn = int(yyPact[yyS[yyp].yys]) + yyErrCode + if yyn >= 0 && yyn < yyLast { + yystate = int(yyAct[yyn]) /* simulate a shift of "error" */ + if int(yyChk[yystate]) == yyErrCode { + goto yystack + } + } - return "" -} + /* the current p has no shift on "error", pop stack */ + if yyDebug >= 2 { + __yyfmt__.Printf("error recovery pops state %d\n", yyS[yyp].yys) + } + yyp-- + } + /* there is no state on the stack with an error shift ... abort */ + goto ret1 -// nextChar peeks the next char from the parser tag -// returns empty string if there is no char to read -func (p *Parser) nextChar() string { - if p.pos < p.length { - return string(p.tag[p.pos]) + case 3: /* no shift yet; clobber input char */ + if yyDebug >= 2 { + __yyfmt__.Printf("error recovery discards %s\n", yyTokname(yytoken)) + } + if yytoken == yyEofCode { + goto ret1 + } + yyrcvr.char = -1 + yytoken = -1 + goto yynewstate /* try again in the same state */ + } } - return "" -} + /* reduction by production yyn */ + if yyDebug >= 2 { + __yyfmt__.Printf("reduce %v in:\n\t%v\n", yyn, yyStatname(yystate)) + } -// parseError returns a formatted and detailed parser error. -// If you don't provide the char that causes the parser to fail, the char at `p.pos` is automatically used. -// By specifying the `msg` arg you can provide additional err hints that can help debugging. -func (p *Parser) parseError(invalidChar string, msg string) error { - if invalidChar == "" { - pos := p.pos - if p.pos == p.length { - pos-- + yynt := yyn + yypt := yyp + _ = yypt // guard against "declared and not used" + + yyp -= int(yyR2[yyn]) + // yyp is now the index of $0. Perform the default action. Iff the + // reduced production is ε, $1 is possibly out of range. + if yyp+1 >= len(yyS) { + nyys := make([]yySymType, len(yyS)*2) + copy(nyys, yyS) + yyS = nyys + } + yyVAL = yyS[yyp+1] + + /* consult goto table to find next state */ + yyn = int(yyR1[yyn]) + yyg := int(yyPgo[yyn]) + yyj := yyg + yyS[yyp].yys + 1 + + if yyj >= yyLast { + yystate = int(yyAct[yyg]) + } else { + yystate = int(yyAct[yyj]) + if int(yyChk[yystate]) != -yyn { + yystate = int(yyAct[yyg]) } - - invalidChar = string(p.tag[pos]) } + // dummy call; replaced with literal code + switch yynt { + + case 1: + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.y:98 + { + yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) + yylex.(*Lexer).rule = yyVAL.expr + } + case 3: + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.y:106 + { + yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) + yylex.(*Lexer).rule = yyVAL.expr + } + case 5: + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.y:114 + { + yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) + yylex.(*Lexer).rule = yyVAL.expr + } + case 7: + yyDollar = yyS[yypt-2 : yypt+1] +//line parser.y:122 + { + if yyDollar[1].text != "" { + // NewChain is only going to return an error if an invalid operator is specified, and since + // we explicitly provide the None operator, we don't expect an error to be returned. + yyVAL.expr, _ = NewChain(None, yyDollar[2].expr) + } else { + yyVAL.expr = yyDollar[2].expr + } - if msg != "" { - msg = ": " + msg - } + yylex.(*Lexer).rule = yyVAL.expr + } + case 8: + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.y:135 + { + yyVAL.expr = yyDollar[2].expr + yylex.(*Lexer).rule = yyVAL.expr + } + case 9: + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.y:140 + { + cond, err := NewCondition(yyDollar[1].text, CompOperator(yyDollar[2].text), yyDollar[3].text) + if err != nil { + // Something went wrong, so just panic and filter.Parse will try to recover from this. + panic(err) + } - return fmt.Errorf("invalid filter '%s', unexpected %s at pos %d%s", p.tag, invalidChar, p.pos, msg) + yyVAL.expr = cond + yylex.(*Lexer).rule = yyVAL.expr + } + case 11: + yyDollar = yyS[yypt-1 : yypt+1] +//line parser.y:154 + { + yyVAL.expr = NewExists(yyDollar[1].text) + yylex.(*Lexer).rule = yyVAL.expr + } + case 14: + yyDollar = yyS[yypt-0 : yypt+1] +//line parser.y:164 + { + yyVAL.text = "" + } + } + goto yystack /* stack new state and value */ } From 5107934c358c98ba2daed2cec3f7e16a59031171 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Fri, 27 Oct 2023 10:44:37 +0200 Subject: [PATCH 09/24] Set `yylex.rule` only once --- internal/filter/parser.y | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/internal/filter/parser.y b/internal/filter/parser.y index 9a0985bf..998099cb 100644 --- a/internal/filter/parser.y +++ b/internal/filter/parser.y @@ -100,12 +100,14 @@ filter_rule: filter_chain logical_op filter_chain yylex.(*Lexer).rule = $$ } | filter_chain + { + yylex.(*Lexer).rule = $$ + } ; filter_chain: conditions_expr logical_op maybe_negated_condition_expr { $$ = reduceFilter($1, $2, $3) - yylex.(*Lexer).rule = $$ } | conditions_expr %prec PREFER_SHIFTING_LOGICAL_OP ; @@ -113,7 +115,6 @@ filter_chain: conditions_expr logical_op maybe_negated_condition_expr conditions_expr: maybe_negated_condition_expr logical_op maybe_negated_condition_expr { $$ = reduceFilter($1, $2, $3) - yylex.(*Lexer).rule = $$ } | maybe_negated_condition_expr %prec PREFER_SHIFTING_LOGICAL_OP ; @@ -127,14 +128,11 @@ maybe_negated_condition_expr: optional_negation condition_expr } else { $$ = $2 } - - yylex.(*Lexer).rule = $$ } condition_expr: "(" filter_chain ")" { $$ = $2 - yylex.(*Lexer).rule = $$ } | identifier comparison_op identifier { @@ -145,7 +143,6 @@ condition_expr: "(" filter_chain ")" } $$ = cond - yylex.(*Lexer).rule = $$ } | exists_expr ; @@ -153,7 +150,6 @@ condition_expr: "(" filter_chain ")" exists_expr: identifier { $$ = NewExists($1) - yylex.(*Lexer).rule = $$ } ; From d6768fe877aaf26964c27d416ddd9dec13d5d308 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Fri, 27 Oct 2023 17:33:08 +0200 Subject: [PATCH 10/24] Reset `Lexer#rule` on error & use `init()` for initializing global confg --- internal/filter/lexer.go | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/internal/filter/lexer.go b/internal/filter/lexer.go index 80b59bb3..82ff8778 100644 --- a/internal/filter/lexer.go +++ b/internal/filter/lexer.go @@ -14,6 +14,12 @@ import ( // Currently, it allows to match any character except these inside the curly braces. var regex = regexp.MustCompile("[^!&|~<>=()]") +// init just sets the global yyErrorVerbose variable to true. +func init() { + // Enable parsers error verbose to get more context of the parsing failures + yyErrorVerbose = true +} + // Parse wraps the auto generated yyParse function. // It parses the given filter string and returns on success a Filter instance. func Parse(expr string) (rule Filter, err error) { @@ -25,9 +31,6 @@ func Parse(expr string) (rule Filter, err error) { // our error function after the scanner initialization. lex.Scanner.Error = lex.ScanError - // Enable parsers error verbose to get more context of the parsing failures - yyErrorVerbose = true - defer func() { // All the grammar rules panics when encountering any errors while reducing the filter rules, so try // to recover from it and return an error instead. Since we're used a named return values, we can set @@ -35,6 +38,11 @@ func Parse(expr string) (rule Filter, err error) { if r := recover(); r != nil { err = errors.New(fmt.Sprint(r)) } + + if err != nil { + // The lexer may contain some incomplete filter rules constructed before the parser panics, so reset it. + rule = nil + } }() yyParse(lex) @@ -129,7 +137,10 @@ func (l *Lexer) Lex(yyval *yySymType) int { // additional context instead. This function then wraps the provided err and adds line, column number and offset // to the error string. Error is equivalent to "yyerror" in the original yacc. func (l *Lexer) Error(s string) { - l.err = errors.New(fmt.Sprintf("%d:%d (%d): %s", l.Line, l.Column, l.Offset, s)) + l.err = fmt.Errorf("%d:%d (%d): %s", l.Line, l.Column, l.Offset, s) + + // Always reset the current filter rule when encountering an error. + l.rule = nil } // ScanError is used to capture all errors the Scanner encounters. From ab4c51d80adf776e123e7c9899bb059dedeb9484 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Fri, 27 Oct 2023 17:34:06 +0200 Subject: [PATCH 11/24] Name regex variable after its usage --- internal/filter/lexer.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/filter/lexer.go b/internal/filter/lexer.go index 82ff8778..707bd75a 100644 --- a/internal/filter/lexer.go +++ b/internal/filter/lexer.go @@ -10,9 +10,9 @@ import ( "text/scanner" ) -// regex contains a compiled regexp and is used by the Lexer to match filter identifiers. -// Currently, it allows to match any character except these inside the curly braces. -var regex = regexp.MustCompile("[^!&|~<>=()]") +// identifiersMatcher contains a compiled regexp and is used by the Lexer to match filter identifiers. +// Currently, it allows to match any character except a LogicalOp and CompOperator. +var identifiersMatcher = regexp.MustCompile("[^!&|~<>=()]") // init just sets the global yyErrorVerbose variable to true. func init() { @@ -152,4 +152,4 @@ func (l *Lexer) ScanError(_ *scanner.Scanner, msg string) { l.Error(msg) } // isIdentRune provides custom implementation of scanner.IsIdentRune. // This function determines whether a given character is allowed to be part of an identifier. -func isIdentRune(ch rune, _ int) bool { return regex.MatchString(string(ch)) } +func isIdentRune(ch rune, _ int) bool { return identifiersMatcher.MatchString(string(ch)) } From 8f74a9a04691fe6d021db17a96a947dc72606b43 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Fri, 27 Oct 2023 17:37:53 +0200 Subject: [PATCH 12/24] Fix accessing to non-existent logical operators --- internal/filter/parser.go | 25 +++++---- internal/filter/parser.y | 1 + internal/filter/parser_test.go | 93 +++++++++++++++++----------------- 3 files changed, 60 insertions(+), 59 deletions(-) diff --git a/internal/filter/parser.go b/internal/filter/parser.go index 32e624bb..08682d60 100644 --- a/internal/filter/parser.go +++ b/internal/filter/parser.go @@ -523,23 +523,27 @@ yydefault: yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) yylex.(*Lexer).rule = yyVAL.expr } + case 2: + yyDollar = yyS[yypt-1 : yypt+1] +//line parser.y:103 + { + yylex.(*Lexer).rule = yyVAL.expr + } case 3: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:106 +//line parser.y:109 { yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) - yylex.(*Lexer).rule = yyVAL.expr } case 5: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:114 +//line parser.y:116 { yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) - yylex.(*Lexer).rule = yyVAL.expr } case 7: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.y:122 +//line parser.y:123 { if yyDollar[1].text != "" { // NewChain is only going to return an error if an invalid operator is specified, and since @@ -548,19 +552,16 @@ yydefault: } else { yyVAL.expr = yyDollar[2].expr } - - yylex.(*Lexer).rule = yyVAL.expr } case 8: yyDollar = yyS[yypt-3 : yypt+1] //line parser.y:135 { yyVAL.expr = yyDollar[2].expr - yylex.(*Lexer).rule = yyVAL.expr } case 9: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:140 +//line parser.y:139 { cond, err := NewCondition(yyDollar[1].text, CompOperator(yyDollar[2].text), yyDollar[3].text) if err != nil { @@ -569,18 +570,16 @@ yydefault: } yyVAL.expr = cond - yylex.(*Lexer).rule = yyVAL.expr } case 11: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.y:154 +//line parser.y:152 { yyVAL.expr = NewExists(yyDollar[1].text) - yylex.(*Lexer).rule = yyVAL.expr } case 14: yyDollar = yyS[yypt-0 : yypt+1] -//line parser.y:164 +//line parser.y:161 { yyVAL.text = "" } diff --git a/internal/filter/parser.y b/internal/filter/parser.y index 998099cb..21d78c4b 100644 --- a/internal/filter/parser.y +++ b/internal/filter/parser.y @@ -129,6 +129,7 @@ maybe_negated_condition_expr: optional_negation condition_expr $$ = $2 } } + ; condition_expr: "(" filter_chain ")" { diff --git a/internal/filter/parser_test.go b/internal/filter/parser_test.go index a2fdd94f..e02d91f3 100644 --- a/internal/filter/parser_test.go +++ b/internal/filter/parser_test.go @@ -148,12 +148,13 @@ func TestParser(t *testing.T) { var expected Filter rule, err := Parse("!foo=bar") - expected = &Chain{op: NONE, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}} + expected = &Chain{op: None, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}} + assert.Nil(t, err, "There should be no errors but got: %s", err) assert.Equal(t, expected, rule) rule, err = Parse("foo=bar&bar=foo") assert.Nil(t, err, "There should be no errors but got: %s", err) - expected = &Chain{op: ALL, rules: []Filter{ + expected = &Chain{op: All, rules: []Filter{ &Condition{op: Equal, column: "foo", value: "bar"}, &Condition{op: Equal, column: "bar", value: "foo"}, }} @@ -161,8 +162,8 @@ func TestParser(t *testing.T) { rule, err = Parse("foo=bar&bar=foo|col=val") assert.Nil(t, err, "There should be no errors but got: %s", err) - expected = &Chain{op: ANY, rules: []Filter{ - &Chain{op: ALL, rules: []Filter{ + expected = &Chain{op: Any, rules: []Filter{ + &Chain{op: All, rules: []Filter{ &Condition{op: Equal, column: "foo", value: "bar"}, &Condition{op: Equal, column: "bar", value: "foo"}, }}, @@ -172,7 +173,7 @@ func TestParser(t *testing.T) { rule, err = Parse("foo=bar|bar=foo") assert.Nil(t, err, "There should be no errors but got: %s", err) - expected = &Chain{op: ANY, rules: []Filter{ + expected = &Chain{op: Any, rules: []Filter{ &Condition{op: Equal, column: "foo", value: "bar"}, &Condition{op: Equal, column: "bar", value: "foo"}, }} @@ -185,18 +186,18 @@ func TestParser(t *testing.T) { rule, err = Parse("(!foo=bar)") assert.Nil(t, err, "There should be no errors but got: %s", err) - expected = &Chain{op: NONE, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}} + expected = &Chain{op: None, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}} assert.Equal(t, expected, rule) rule, err = Parse("!(foo=bar)") assert.Nil(t, err, "There should be no errors but got: %s", err) - expected = &Chain{op: NONE, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}} + expected = &Chain{op: None, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}} assert.Equal(t, expected, rule) rule, err = Parse("!(!foo=bar)") assert.Nil(t, err, "There should be no errors but got: %s", err) - expected = &Chain{op: NONE, rules: []Filter{ - &Chain{op: NONE, rules: []Filter{ + expected = &Chain{op: None, rules: []Filter{ + &Chain{op: None, rules: []Filter{ &Condition{op: Equal, column: "foo", value: "bar"}, }}, }} @@ -204,8 +205,8 @@ func TestParser(t *testing.T) { rule, err = Parse("!(foo=bar|bar=foo)") assert.Nil(t, err, "There should be no errors but got: %s", err) - expected = &Chain{op: NONE, rules: []Filter{ - &Chain{op: ANY, rules: []Filter{ + expected = &Chain{op: None, rules: []Filter{ + &Chain{op: Any, rules: []Filter{ &Condition{op: Equal, column: "foo", value: "bar"}, &Condition{op: Equal, column: "bar", value: "foo"}, }}, @@ -214,25 +215,25 @@ func TestParser(t *testing.T) { rule, err = Parse("((!foo=bar)&bar!=foo)") assert.Nil(t, err, "There should be no errors but got: %s", err) - expected = &Chain{op: ALL, rules: []Filter{ - &Chain{op: NONE, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}}, + expected = &Chain{op: All, rules: []Filter{ + &Chain{op: None, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}}, &Condition{op: UnEqual, column: "bar", value: "foo"}, }} assert.Equal(t, expected, rule) rule, err = Parse("!foo&!bar") assert.Nil(t, err, "There should be no errors but got: %s", err) - expected = &Chain{op: ALL, rules: []Filter{ - &Chain{op: NONE, rules: []Filter{&Exists{column: "foo"}}}, - &Chain{op: NONE, rules: []Filter{&Exists{column: "bar"}}}, + expected = &Chain{op: All, rules: []Filter{ + &Chain{op: None, rules: []Filter{&Exists{column: "foo"}}}, + &Chain{op: None, rules: []Filter{&Exists{column: "bar"}}}, }} assert.Equal(t, expected, rule) rule, err = Parse("!(!foo|bar)") assert.Nil(t, err, "There should be no errors but got: %s", err) - expected = &Chain{op: NONE, rules: []Filter{ - &Chain{op: ANY, rules: []Filter{ - &Chain{op: NONE, rules: []Filter{&Exists{column: "foo"}}}, + expected = &Chain{op: None, rules: []Filter{ + &Chain{op: Any, rules: []Filter{ + &Chain{op: None, rules: []Filter{&Exists{column: "foo"}}}, &Exists{column: "bar"}, }}, }} @@ -240,9 +241,9 @@ func TestParser(t *testing.T) { rule, err = Parse("!(!(foo|bar))") assert.Nil(t, err, "There should be no errors but got: %s", err) - expected = &Chain{op: NONE, rules: []Filter{ - &Chain{op: NONE, rules: []Filter{ - &Chain{op: ANY, rules: []Filter{ + expected = &Chain{op: None, rules: []Filter{ + &Chain{op: None, rules: []Filter{ + &Chain{op: Any, rules: []Filter{ &Exists{column: "foo"}, &Exists{column: "bar"}}, }, @@ -252,7 +253,7 @@ func TestParser(t *testing.T) { rule, err = Parse("foo=bar&bar!=foo") assert.Nil(t, err, "There should be no errors but got: %s", err) - expected = &Chain{op: ALL, rules: []Filter{ + expected = &Chain{op: All, rules: []Filter{ &Condition{op: Equal, column: "foo", value: "bar"}, &Condition{op: UnEqual, column: "bar", value: "foo"}, }} @@ -260,14 +261,14 @@ func TestParser(t *testing.T) { rule, err = Parse("!(foo=bar|bar=foo)&(foo!=bar|bar!=foo)") assert.Nil(t, err, "There should be no errors but got: %s", err) - expected = &Chain{op: ALL, rules: []Filter{ - &Chain{op: NONE, rules: []Filter{ - &Chain{op: ANY, rules: []Filter{ + expected = &Chain{op: All, rules: []Filter{ + &Chain{op: None, rules: []Filter{ + &Chain{op: Any, rules: []Filter{ &Condition{op: Equal, column: "foo", value: "bar"}, &Condition{op: Equal, column: "bar", value: "foo"}, }}, }}, - &Chain{op: ANY, rules: []Filter{ + &Chain{op: Any, rules: []Filter{ &Condition{op: UnEqual, column: "foo", value: "bar"}, &Condition{op: UnEqual, column: "bar", value: "foo"}, }}, @@ -277,14 +278,14 @@ func TestParser(t *testing.T) { rule, err = Parse("foo=bar&bar!=foo&john>doe|doedoe|doedoe|doedoe|doe Date: Mon, 6 Nov 2023 09:31:00 +0100 Subject: [PATCH 13/24] Set the scanner mode to `ScanIdents` By default, the scanner mode is set to `scanner.GoTokens` which contains way more flags than are actually needed. This commit now sets this to only recognize identifiers and `scanner#Scan()` will return all other unrecognized tokens as they are. --- internal/filter/lexer.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/internal/filter/lexer.go b/internal/filter/lexer.go index 707bd75a..79128fbc 100644 --- a/internal/filter/lexer.go +++ b/internal/filter/lexer.go @@ -27,13 +27,19 @@ func Parse(expr string) (rule Filter, err error) { lex.IsIdentRune = isIdentRune lex.Init(strings.NewReader(expr)) + // Set the scanner mode to recognize only identifiers. This way all unrecognized tokens will be returned + // just as they are, and our Lexer#Lex() method will then recognize whatever valid input is required. + // Note: This is in fact not necessary, as our custom function `isIdentRune` accepts any token that matches the + // regex pattern `identifiersMatcher`, so the scanner would never match all the scanner.GoTokens except ScanIdents. + lex.Mode = scanner.ScanIdents + // scanner.Init sets the error function to nil, therefore, we have to register // our error function after the scanner initialization. lex.Scanner.Error = lex.ScanError defer func() { // All the grammar rules panics when encountering any errors while reducing the filter rules, so try - // to recover from it and return an error instead. Since we're used a named return values, we can set + // to recover from it and return an error instead. Since we're using a named return values, we can set // the err value even in deferred function. See https://go.dev/blog/defer-panic-and-recover if r := recover(); r != nil { err = errors.New(fmt.Sprint(r)) @@ -51,7 +57,7 @@ func Parse(expr string) (rule Filter, err error) { return } -// Lexer is used to tokenize the filter input into a set literals. +// Lexer is used to tokenize the filter input into a set of literals. // This is just a wrapper around the Scanner type and implements the yyLexer interface used by the parser. type Lexer struct { scanner.Scanner From 94f92c6fe709039b8f5ece87c6fabe27f46df989 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Mon, 6 Nov 2023 09:32:00 +0100 Subject: [PATCH 14/24] Drop superfluous `T_STRING` token --- internal/filter/lexer.go | 4 -- internal/filter/parser.go | 82 +++++++++++++++++----------------- internal/filter/parser.y | 6 +-- internal/filter/parser_test.go | 28 ++++++------ 4 files changed, 58 insertions(+), 62 deletions(-) diff --git a/internal/filter/lexer.go b/internal/filter/lexer.go index 79128fbc..a3b0f1cd 100644 --- a/internal/filter/lexer.go +++ b/internal/filter/lexer.go @@ -74,10 +74,6 @@ func (l *Lexer) Lex(yyval *yySymType) int { return T_IDENTIFIER } - if token == scanner.String { - return T_STRING - } - switch lit { case "&": return '&' diff --git a/internal/filter/parser.go b/internal/filter/parser.go index 08682d60..8de28aa1 100644 --- a/internal/filter/parser.go +++ b/internal/filter/parser.go @@ -62,9 +62,8 @@ const T_LESS_THAN = 57350 const T_GREATER_THAN = 57351 const T_LESS_THAN_OR_EQUAL = 57352 const T_GREATER_THAN_OR_EQUAL = 57353 -const T_STRING = 57354 -const T_IDENTIFIER = 57355 -const PREFER_SHIFTING_LOGICAL_OP = 57356 +const T_IDENTIFIER = 57354 +const PREFER_SHIFTING_LOGICAL_OP = 57355 var yyToknames = [...]string{ "$end", @@ -83,10 +82,9 @@ var yyToknames = [...]string{ "\">\"", "T_LESS_THAN_OR_EQUAL", "T_GREATER_THAN_OR_EQUAL", - "T_STRING", "T_IDENTIFIER", - "\"&\"", "\"|\"", + "\"&\"", "PREFER_SHIFTING_LOGICAL_OP", "\"(\"", "\")\"", @@ -98,6 +96,8 @@ const yyEofCode = 1 const yyErrCode = 2 const yyInitialStackSize = 16 +//line parser.y:177 + //line yacctab:1 var yyExca = [...]int8{ -1, 1, @@ -107,51 +107,51 @@ var yyExca = [...]int8{ const yyPrivate = 57344 -const yyLast = 34 +const yyLast = 32 var yyAct = [...]int8{ - 17, 16, 31, 14, 23, 13, 24, 5, 25, 22, - 26, 27, 4, 29, 2, 28, 30, 8, 9, 17, - 16, 6, 18, 19, 20, 7, 32, 15, 21, 10, - 11, 12, 3, 1, + 22, 30, 23, 14, 24, 16, 25, 26, 4, 28, + 16, 27, 29, 2, 13, 9, 8, 6, 7, 18, + 19, 17, 10, 11, 5, 31, 21, 20, 15, 12, + 3, 1, } var yyPact = [...]int16{ - 14, -1000, -2, -2, -2, -17, -1000, 14, -1000, -1000, - 14, 14, -1000, 14, 0, -1000, -1000, -1000, -1000, -1000, - -1000, -21, 2, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, -1000, -1000, + 10, -1000, -3, -3, -3, -7, -1000, 10, -1000, -1000, + 10, 10, -1000, 10, -4, -1000, -1000, -1000, -1000, -1000, + -21, -12, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, + -1000, -1000, } var yyPgo = [...]int8{ - 0, 33, 14, 32, 12, 31, 27, 9, 7, 3, - 25, + 0, 31, 13, 30, 8, 29, 28, 26, 24, 3, + 18, } var yyR1 = [...]int8{ 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, - 5, 6, 9, 9, 8, 8, 10, 10, 7, 7, - 7, 7, 7, 7, 7, 7, + 5, 6, 9, 8, 8, 10, 10, 7, 7, 7, + 7, 7, 7, 7, 7, } var yyR2 = [...]int8{ 0, 3, 1, 3, 1, 3, 1, 2, 3, 3, - 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, } var yyChk = [...]int16{ - -1000, -1, -2, -3, -4, -8, 7, -10, 19, 20, - -10, -10, -5, 22, -9, -6, 18, 17, -2, -4, - -4, -2, -7, 4, 6, 8, 10, 11, 15, 13, - 16, 23, -9, + -1000, -1, -2, -3, -4, -8, 7, -10, 19, 18, + -10, -10, -5, 21, -9, -6, 17, -2, -4, -4, + -2, -7, 4, 6, 8, 10, 11, 15, 13, 16, + 22, -9, } var yyDef = [...]int8{ - 14, -2, 2, 4, 6, 0, 15, 14, 16, 17, - 14, 14, 7, 14, 11, 10, 12, 13, 1, 3, - 5, 0, 0, 18, 19, 20, 21, 22, 23, 24, - 25, 8, 9, + 13, -2, 2, 4, 6, 0, 14, 13, 15, 16, + 13, 13, 7, 13, 11, 10, 12, 1, 3, 5, + 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, + 8, 9, } var yyTok1 = [...]int8{ @@ -159,7 +159,7 @@ var yyTok1 = [...]int8{ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 3, 3, 3, 19, 3, - 22, 23, 3, 3, 3, 3, 3, 3, 3, 3, + 21, 22, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 12, 5, 14, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, @@ -167,12 +167,12 @@ var yyTok1 = [...]int8{ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 20, 3, 9, + 3, 3, 3, 3, 18, 3, 9, } var yyTok2 = [...]int8{ 2, 3, 4, 6, 8, 10, 11, 13, 15, 16, - 17, 18, 21, + 17, 20, } var yyTok3 = [...]int8{ @@ -518,32 +518,32 @@ yydefault: case 1: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:98 +//line parser.y:97 { yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) yylex.(*Lexer).rule = yyVAL.expr } case 2: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.y:103 +//line parser.y:102 { yylex.(*Lexer).rule = yyVAL.expr } case 3: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:109 +//line parser.y:108 { yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) } case 5: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:116 +//line parser.y:115 { yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) } case 7: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.y:123 +//line parser.y:122 { if yyDollar[1].text != "" { // NewChain is only going to return an error if an invalid operator is specified, and since @@ -555,13 +555,13 @@ yydefault: } case 8: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:135 +//line parser.y:134 { yyVAL.expr = yyDollar[2].expr } case 9: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:139 +//line parser.y:138 { cond, err := NewCondition(yyDollar[1].text, CompOperator(yyDollar[2].text), yyDollar[3].text) if err != nil { @@ -573,13 +573,13 @@ yydefault: } case 11: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.y:152 +//line parser.y:151 { yyVAL.expr = NewExists(yyDollar[1].text) } - case 14: + case 13: yyDollar = yyS[yypt-0 : yypt+1] -//line parser.y:161 +//line parser.y:159 { yyVAL.text = "" } diff --git a/internal/filter/parser.y b/internal/filter/parser.y index 21d78c4b..8f8c471c 100644 --- a/internal/filter/parser.y +++ b/internal/filter/parser.y @@ -67,7 +67,6 @@ func reduceFilter(rule Filter, op string, rules ...Filter) Filter { %token T_GREATER_THAN ">" %token T_LESS_THAN_OR_EQUAL "<" T_EQUAL %token T_GREATER_THAN_OR_EQUAL ">" T_EQUAL -%token T_STRING %token T_IDENTIFIER %type T_EQUAL @@ -79,7 +78,7 @@ func reduceFilter(rule Filter, op string, rules ...Filter) Filter { %type T_LESS_THAN_OR_EQUAL %type T_GREATER_THAN_OR_EQUAL -%type "!" "&" "|" +%type "|" "&" "!" // This is just used for declaring explicit precedence and resolves shift/reduce conflicts // in `filter_chain` and `conditions_expr` rules. @@ -155,7 +154,6 @@ exists_expr: identifier ; identifier: T_IDENTIFIER - | T_STRING ; optional_negation: /* empty */ { $$ = "" } @@ -175,3 +173,5 @@ comparison_op: T_EQUAL | T_GREATER_THAN | T_GREATER_THAN_OR_EQUAL ; + +%% diff --git a/internal/filter/parser_test.go b/internal/filter/parser_test.go index e02d91f3..1fa9e4a7 100644 --- a/internal/filter/parser_test.go +++ b/internal/filter/parser_test.go @@ -16,10 +16,10 @@ func TestParser(t *testing.T) { assert.EqualError(t, err, "1:10 (9): syntax error: unexpected T_IDENTIFIER") _, err = Parse("(a=b|c=d|)e=f") - assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\", expecting T_STRING or T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\", expecting T_IDENTIFIER or \"(\"") _, err = Parse("col=(") - assert.EqualError(t, err, "1:5 (4): syntax error: unexpected \"(\", expecting T_STRING or T_IDENTIFIER") + assert.EqualError(t, err, "1:5 (4): syntax error: unexpected \"(\", expecting T_IDENTIFIER") _, err = Parse("(((x=a)&y=b") assert.EqualError(t, err, "1:12 (11): syntax error: unexpected $end, expecting \")\"") @@ -28,10 +28,10 @@ func TestParser(t *testing.T) { assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\"") _, err = Parse("!(&") - assert.EqualError(t, err, "1:3 (2): syntax error: unexpected \"&\", expecting T_STRING or T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:3 (2): syntax error: unexpected \"&\", expecting T_IDENTIFIER or \"(\"") _, err = Parse("foo&bar=(te(st)") - assert.EqualError(t, err, "1:9 (8): syntax error: unexpected \"(\", expecting T_STRING or T_IDENTIFIER") + assert.EqualError(t, err, "1:9 (8): syntax error: unexpected \"(\", expecting T_IDENTIFIER") _, err = Parse("foo&bar=te(st)") assert.EqualError(t, err, "1:11 (10): syntax error: unexpected \"(\"") @@ -40,34 +40,34 @@ func TestParser(t *testing.T) { assert.EqualError(t, err, "1:13 (12): syntax error: unexpected \")\"") _, err = Parse("!()|&()&)") - assert.EqualError(t, err, "1:3 (2): syntax error: unexpected \")\", expecting T_STRING or T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:3 (2): syntax error: unexpected \")\", expecting T_IDENTIFIER or \"(\"") _, err = Parse("=foo") - assert.EqualError(t, err, "1:1 (0): syntax error: unexpected T_EQUAL, expecting T_STRING or T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected T_EQUAL, expecting T_IDENTIFIER or \"(\"") _, err = Parse("foo>") - assert.EqualError(t, err, "1:5 (4): syntax error: unexpected $end, expecting T_STRING or T_IDENTIFIER") + assert.EqualError(t, err, "1:5 (4): syntax error: unexpected $end, expecting T_IDENTIFIER") _, err = Parse("foo==") - assert.EqualError(t, err, "1:5 (4): syntax error: unexpected T_EQUAL, expecting T_STRING or T_IDENTIFIER") + assert.EqualError(t, err, "1:5 (4): syntax error: unexpected T_EQUAL, expecting T_IDENTIFIER") _, err = Parse("=>foo") - assert.EqualError(t, err, "1:1 (0): syntax error: unexpected T_EQUAL, expecting T_STRING or T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected T_EQUAL, expecting T_IDENTIFIER or \"(\"") _, err = Parse("&foo") - assert.EqualError(t, err, "1:1 (0): syntax error: unexpected \"&\", expecting T_STRING or T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected \"&\", expecting T_IDENTIFIER or \"(\"") _, err = Parse("&&foo") - assert.EqualError(t, err, "1:1 (0): syntax error: unexpected \"&\", expecting T_STRING or T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected \"&\", expecting T_IDENTIFIER or \"(\"") _, err = Parse("(&foo=bar)") - assert.EqualError(t, err, "1:2 (1): syntax error: unexpected \"&\", expecting T_STRING or T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:2 (1): syntax error: unexpected \"&\", expecting T_IDENTIFIER or \"(\"") _, err = Parse("(foo=bar|)") - assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\", expecting T_STRING or T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\", expecting T_IDENTIFIER or \"(\"") _, err = Parse("((((((") - assert.EqualError(t, err, "1:7 (6): syntax error: unexpected $end, expecting T_STRING or T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:7 (6): syntax error: unexpected $end, expecting T_IDENTIFIER or \"(\"") _, err = Parse("foo&bar&col=val!=val") assert.EqualError(t, err, "1:17 (16): syntax error: unexpected T_UNEQUAL") From 0cae943c1bef09a1e06f315563fe3ac34d08b43b Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Mon, 6 Nov 2023 09:32:00 +0100 Subject: [PATCH 15/24] Add test cases that triggers scan error --- internal/filter/parser_test.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/internal/filter/parser_test.go b/internal/filter/parser_test.go index 1fa9e4a7..9f64c059 100644 --- a/internal/filter/parser_test.go +++ b/internal/filter/parser_test.go @@ -77,6 +77,13 @@ func TestParser(t *testing.T) { _, err = Parse("((0&((((((((((((((((((((((0=0)") assert.EqualError(t, err, "1:31 (30): syntax error: unexpected $end, expecting \")\"") + + // IPL web filter parser accepts such invalid strings, but our Lexer doesn't. + _, err = Parse("foo\x00") + assert.EqualError(t, err, "1:1 (0): invalid character NUL") + + _, err = Parse("\xff") + assert.EqualError(t, err, "0:0 (0): invalid UTF-8 encoding") }) t.Run("ParseAllKindOfSimpleFilters", func(t *testing.T) { From 5d6c645835102268a3f9890011fa6ada566d2366 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Mon, 6 Nov 2023 09:33:00 +0100 Subject: [PATCH 16/24] Url decode `identifier`s in place & cleanup some type/token defs --- internal/filter/parser.go | 82 ++++++++++++++++++++++----------------- internal/filter/parser.y | 44 +++++++++++---------- 2 files changed, 70 insertions(+), 56 deletions(-) diff --git a/internal/filter/parser.go b/internal/filter/parser.go index 8de28aa1..fa2dc7a7 100644 --- a/internal/filter/parser.go +++ b/internal/filter/parser.go @@ -6,6 +6,10 @@ package filter import __yyfmt__ "fmt" +//line parser.y:3 + +import "net/url" + // reduceFilter reduces the given filter rules into a single filter chain (initiated with the provided operator). // When the operator type of the first argument (Filter) is not of type filter.Any or the given operator is not // of type filter.All, this will just create a new chain with the new op and append all the filter rules to it. @@ -15,8 +19,6 @@ import __yyfmt__ "fmt" // The first argument `rule` is supposed to be a filter.Any Chain containing the first two conditions. // We then call this function when the parser is processing the logical `&` op and the Unlike condition, // and what this function will do is logically re-group the conditions into `foo=bar|(bar~foo&col!~val)`. -// -//line parser.y:3 func reduceFilter(rule Filter, op string, rules ...Filter) Filter { chain, ok := rule.(*Chain) if ok && chain.op == Any && LogicalOp(op) == All { @@ -47,7 +49,7 @@ func reduceFilter(rule Filter, op string, rules ...Filter) Filter { return chain } -//line parser.y:45 +//line parser.y:47 type yySymType struct { yys int expr Filter @@ -70,21 +72,17 @@ var yyToknames = [...]string{ "error", "$unk", "T_EQUAL", - "\"=\"", "T_UNEQUAL", - "\"!\"", "T_LIKE", - "\"~\"", "T_UNLIKE", "T_LESS_THAN", - "\"<\"", "T_GREATER_THAN", - "\">\"", "T_LESS_THAN_OR_EQUAL", "T_GREATER_THAN_OR_EQUAL", "T_IDENTIFIER", "\"|\"", "\"&\"", + "\"!\"", "PREFER_SHIFTING_LOGICAL_OP", "\"(\"", "\")\"", @@ -96,7 +94,7 @@ const yyEofCode = 1 const yyErrCode = 2 const yyInitialStackSize = 16 -//line parser.y:177 +//line parser.y:181 //line yacctab:1 var yyExca = [...]int8{ @@ -110,22 +108,22 @@ const yyPrivate = 57344 const yyLast = 32 var yyAct = [...]int8{ - 22, 30, 23, 14, 24, 16, 25, 26, 4, 28, - 16, 27, 29, 2, 13, 9, 8, 6, 7, 18, - 19, 17, 10, 11, 5, 31, 21, 20, 15, 12, + 14, 30, 22, 23, 24, 25, 26, 28, 27, 29, + 6, 16, 2, 9, 8, 16, 13, 4, 5, 7, + 17, 21, 31, 10, 11, 15, 20, 12, 18, 19, 3, 1, } var yyPact = [...]int16{ - 10, -1000, -3, -3, -3, -7, -1000, 10, -1000, -1000, - 10, 10, -1000, 10, -4, -1000, -1000, -1000, -1000, -1000, - -21, -12, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, + -5, -1000, 0, 0, 0, -1, -1000, -5, -1000, -1000, + -5, -5, -1000, -5, -2, -1000, -1000, -1000, -1000, -1000, + -17, 3, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, } var yyPgo = [...]int8{ - 0, 31, 13, 30, 8, 29, 28, 26, 24, 3, - 18, + 0, 31, 12, 30, 17, 27, 25, 21, 18, 0, + 19, } var yyR1 = [...]int8{ @@ -141,10 +139,10 @@ var yyR2 = [...]int8{ } var yyChk = [...]int16{ - -1000, -1, -2, -3, -4, -8, 7, -10, 19, 18, - -10, -10, -5, 21, -9, -6, 17, -2, -4, -4, - -2, -7, 4, 6, 8, 10, 11, 15, 13, 16, - 22, -9, + -1000, -1, -2, -3, -4, -8, 15, -10, 14, 13, + -10, -10, -5, 17, -9, -6, 12, -2, -4, -4, + -2, -7, 4, 5, 6, 7, 8, 10, 9, 11, + 18, -9, } var yyDef = [...]int8{ @@ -158,21 +156,21 @@ var yyTok1 = [...]int8{ 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 7, 3, 3, 3, 3, 19, 3, - 21, 22, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 15, 3, 3, 3, 3, 14, 3, + 17, 18, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 12, 5, 14, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 18, 3, 9, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 13, } var yyTok2 = [...]int8{ - 2, 3, 4, 6, 8, 10, 11, 13, 15, 16, - 17, 20, + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 16, } var yyTok3 = [...]int8{ @@ -518,32 +516,32 @@ yydefault: case 1: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:97 +//line parser.y:92 { yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) yylex.(*Lexer).rule = yyVAL.expr } case 2: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.y:102 +//line parser.y:97 { yylex.(*Lexer).rule = yyVAL.expr } case 3: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:108 +//line parser.y:103 { yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) } case 5: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:115 +//line parser.y:110 { yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) } case 7: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.y:122 +//line parser.y:117 { if yyDollar[1].text != "" { // NewChain is only going to return an error if an invalid operator is specified, and since @@ -555,13 +553,13 @@ yydefault: } case 8: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:134 +//line parser.y:129 { yyVAL.expr = yyDollar[2].expr } case 9: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:138 +//line parser.y:133 { cond, err := NewCondition(yyDollar[1].text, CompOperator(yyDollar[2].text), yyDollar[3].text) if err != nil { @@ -573,13 +571,25 @@ yydefault: } case 11: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.y:151 +//line parser.y:146 { yyVAL.expr = NewExists(yyDollar[1].text) } + case 12: + yyDollar = yyS[yypt-1 : yypt+1] +//line parser.y:152 + { + column, err := url.QueryUnescape(yyDollar[1].text) + if err != nil { + // Something went wrong, so just panic and filter.Parse will try to recover from this. + panic(err) + } + + yyVAL.text = column + } case 13: yyDollar = yyS[yypt-0 : yypt+1] -//line parser.y:159 +//line parser.y:163 { yyVAL.text = "" } diff --git a/internal/filter/parser.y b/internal/filter/parser.y index 8f8c471c..cbfe6f3d 100644 --- a/internal/filter/parser.y +++ b/internal/filter/parser.y @@ -2,6 +2,8 @@ package filter +import "net/url" + // reduceFilter reduces the given filter rules into a single filter chain (initiated with the provided operator). // When the operator type of the first argument (Filter) is not of type filter.Any or the given operator is not // of type filter.All, this will just create a new chain with the new op and append all the filter rules to it. @@ -59,26 +61,18 @@ func reduceFilter(rule Filter, op string, rules ...Filter) Filter { %type identifier %type logical_op -%token T_EQUAL "=" -%token T_UNEQUAL "!" T_EQUAL -%token T_LIKE "~" -%token T_UNLIKE "!" T_LIKE -%token T_LESS_THAN "<" -%token T_GREATER_THAN ">" -%token T_LESS_THAN_OR_EQUAL "<" T_EQUAL -%token T_GREATER_THAN_OR_EQUAL ">" T_EQUAL +%token T_EQUAL +%token T_UNEQUAL +%token T_LIKE +%token T_UNLIKE +%token T_LESS_THAN +%token T_GREATER_THAN +%token T_LESS_THAN_OR_EQUAL +%token T_GREATER_THAN_OR_EQUAL %token T_IDENTIFIER -%type T_EQUAL -%type T_UNEQUAL -%type T_LIKE -%type T_UNLIKE -%type T_LESS_THAN -%type T_GREATER_THAN -%type T_LESS_THAN_OR_EQUAL -%type T_GREATER_THAN_OR_EQUAL - -%type "|" "&" "!" +%type "|" "&" +%type "!" // This is just used for declaring explicit precedence and resolves shift/reduce conflicts // in `filter_chain` and `conditions_expr` rules. @@ -87,7 +81,8 @@ func reduceFilter(rule Filter, op string, rules ...Filter) Filter { %nonassoc T_EQUAL T_UNEQUAL T_LIKE T_UNLIKE %nonassoc T_LESS_THAN T_LESS_THAN_OR_EQUAL T_GREATER_THAN T_GREATER_THAN_OR_EQUAL -%left "!" "&" "|" +%left "|" "&" +%left "!" %left "(" %right ")" @@ -154,9 +149,18 @@ exists_expr: identifier ; identifier: T_IDENTIFIER + { + column, err := url.QueryUnescape($1) + if err != nil { + // Something went wrong, so just panic and filter.Parse will try to recover from this. + panic(err) + } + + $$ = column + } ; -optional_negation: /* empty */ { $$ = "" } +optional_negation: /* empty */ { $$ = "" } | "!" ; From 0ca652db100b37afcaef5194a2b0143c5ecbbefe Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Mon, 6 Nov 2023 09:34:00 +0100 Subject: [PATCH 17/24] Resolve `filter_rule` in brackets instead of `filter_chain` --- internal/filter/parser.go | 12 ++++++------ internal/filter/parser.y | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/internal/filter/parser.go b/internal/filter/parser.go index fa2dc7a7..d2897f87 100644 --- a/internal/filter/parser.go +++ b/internal/filter/parser.go @@ -109,9 +109,9 @@ const yyLast = 32 var yyAct = [...]int8{ 14, 30, 22, 23, 24, 25, 26, 28, 27, 29, - 6, 16, 2, 9, 8, 16, 13, 4, 5, 7, - 17, 21, 31, 10, 11, 15, 20, 12, 18, 19, - 3, 1, + 6, 16, 1, 9, 8, 16, 13, 4, 5, 2, + 7, 21, 31, 15, 10, 11, 20, 17, 18, 19, + 12, 3, } var yyPact = [...]int16{ @@ -122,8 +122,8 @@ var yyPact = [...]int16{ } var yyPgo = [...]int8{ - 0, 31, 12, 30, 17, 27, 25, 21, 18, 0, - 19, + 0, 12, 19, 31, 17, 30, 23, 21, 18, 0, + 20, } var yyR1 = [...]int8{ @@ -141,7 +141,7 @@ var yyR2 = [...]int8{ var yyChk = [...]int16{ -1000, -1, -2, -3, -4, -8, 15, -10, 14, 13, -10, -10, -5, 17, -9, -6, 12, -2, -4, -4, - -2, -7, 4, 5, 6, 7, 8, 10, 9, 11, + -1, -7, 4, 5, 6, 7, 8, 10, 9, 11, 18, -9, } diff --git a/internal/filter/parser.y b/internal/filter/parser.y index cbfe6f3d..954b2f4e 100644 --- a/internal/filter/parser.y +++ b/internal/filter/parser.y @@ -125,7 +125,7 @@ maybe_negated_condition_expr: optional_negation condition_expr } ; -condition_expr: "(" filter_chain ")" +condition_expr: "(" filter_rule ")" { $$ = $2 } From c627cd7fa377c18734e06349be7a3507203493d6 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Mon, 6 Nov 2023 09:35:00 +0100 Subject: [PATCH 18/24] Provide token friendly names in err messages --- internal/filter/lexer.go | 20 ++++++++++++++++++++ internal/filter/parser_test.go | 32 ++++++++++++++++---------------- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/internal/filter/lexer.go b/internal/filter/lexer.go index a3b0f1cd..f6f0e798 100644 --- a/internal/filter/lexer.go +++ b/internal/filter/lexer.go @@ -14,6 +14,21 @@ import ( // Currently, it allows to match any character except a LogicalOp and CompOperator. var identifiersMatcher = regexp.MustCompile("[^!&|~<>=()]") +// tokenDisplayNames contains a list of all the defined parser tokens and their respective +// friendly names used to output in error messages. +var tokenDisplayNames = map[string]string{ + "$unk": `"unknown"`, + "T_EQUAL": `"="`, + "T_UNEQUAL": `"!="`, + "T_LIKE": `"~"`, + "T_UNLIKE": `"!~"`, + "T_LESS_THAN": `"<"`, + "T_GREATER_THAN": `">"`, + "T_LESS_THAN_OR_EQUAL": `"<="`, + "T_GREATER_THAN_OR_EQUAL": `">="`, + "T_IDENTIFIER": `"column or value"`, +} + // init just sets the global yyErrorVerbose variable to true. func init() { // Enable parsers error verbose to get more context of the parsing failures @@ -139,6 +154,11 @@ func (l *Lexer) Lex(yyval *yySymType) int { // additional context instead. This function then wraps the provided err and adds line, column number and offset // to the error string. Error is equivalent to "yyerror" in the original yacc. func (l *Lexer) Error(s string) { + // Replace all parser token names by their corresponding friendly names. + for token, name := range tokenDisplayNames { + s = strings.ReplaceAll(s, token, name) + } + l.err = fmt.Errorf("%d:%d (%d): %s", l.Line, l.Column, l.Offset, s) // Always reset the current filter rule when encountering an error. diff --git a/internal/filter/parser_test.go b/internal/filter/parser_test.go index 9f64c059..0f38e32d 100644 --- a/internal/filter/parser_test.go +++ b/internal/filter/parser_test.go @@ -13,13 +13,13 @@ func TestParser(t *testing.T) { t.Parallel() _, err := Parse("(a=b|c=d)e=f") - assert.EqualError(t, err, "1:10 (9): syntax error: unexpected T_IDENTIFIER") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \"column or value\"") _, err = Parse("(a=b|c=d|)e=f") - assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\", expecting T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\", expecting \"column or value\" or \"(\"") _, err = Parse("col=(") - assert.EqualError(t, err, "1:5 (4): syntax error: unexpected \"(\", expecting T_IDENTIFIER") + assert.EqualError(t, err, "1:5 (4): syntax error: unexpected \"(\", expecting \"column or value\"") _, err = Parse("(((x=a)&y=b") assert.EqualError(t, err, "1:12 (11): syntax error: unexpected $end, expecting \")\"") @@ -28,10 +28,10 @@ func TestParser(t *testing.T) { assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\"") _, err = Parse("!(&") - assert.EqualError(t, err, "1:3 (2): syntax error: unexpected \"&\", expecting T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:3 (2): syntax error: unexpected \"&\", expecting \"column or value\" or \"(\"") _, err = Parse("foo&bar=(te(st)") - assert.EqualError(t, err, "1:9 (8): syntax error: unexpected \"(\", expecting T_IDENTIFIER") + assert.EqualError(t, err, "1:9 (8): syntax error: unexpected \"(\", expecting \"column or value\"") _, err = Parse("foo&bar=te(st)") assert.EqualError(t, err, "1:11 (10): syntax error: unexpected \"(\"") @@ -40,37 +40,37 @@ func TestParser(t *testing.T) { assert.EqualError(t, err, "1:13 (12): syntax error: unexpected \")\"") _, err = Parse("!()|&()&)") - assert.EqualError(t, err, "1:3 (2): syntax error: unexpected \")\", expecting T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:3 (2): syntax error: unexpected \")\", expecting \"column or value\" or \"(\"") _, err = Parse("=foo") - assert.EqualError(t, err, "1:1 (0): syntax error: unexpected T_EQUAL, expecting T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected \"=\", expecting \"column or value\" or \"(\"") _, err = Parse("foo>") - assert.EqualError(t, err, "1:5 (4): syntax error: unexpected $end, expecting T_IDENTIFIER") + assert.EqualError(t, err, "1:5 (4): syntax error: unexpected $end, expecting \"column or value\"") _, err = Parse("foo==") - assert.EqualError(t, err, "1:5 (4): syntax error: unexpected T_EQUAL, expecting T_IDENTIFIER") + assert.EqualError(t, err, "1:5 (4): syntax error: unexpected \"=\", expecting \"column or value\"") _, err = Parse("=>foo") - assert.EqualError(t, err, "1:1 (0): syntax error: unexpected T_EQUAL, expecting T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected \"=\", expecting \"column or value\" or \"(\"") _, err = Parse("&foo") - assert.EqualError(t, err, "1:1 (0): syntax error: unexpected \"&\", expecting T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected \"&\", expecting \"column or value\" or \"(\"") _, err = Parse("&&foo") - assert.EqualError(t, err, "1:1 (0): syntax error: unexpected \"&\", expecting T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:1 (0): syntax error: unexpected \"&\", expecting \"column or value\" or \"(\"") _, err = Parse("(&foo=bar)") - assert.EqualError(t, err, "1:2 (1): syntax error: unexpected \"&\", expecting T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:2 (1): syntax error: unexpected \"&\", expecting \"column or value\" or \"(\"") _, err = Parse("(foo=bar|)") - assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\", expecting T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\", expecting \"column or value\" or \"(\"") _, err = Parse("((((((") - assert.EqualError(t, err, "1:7 (6): syntax error: unexpected $end, expecting T_IDENTIFIER or \"(\"") + assert.EqualError(t, err, "1:7 (6): syntax error: unexpected $end, expecting \"column or value\" or \"(\"") _, err = Parse("foo&bar&col=val!=val") - assert.EqualError(t, err, "1:17 (16): syntax error: unexpected T_UNEQUAL") + assert.EqualError(t, err, "1:17 (16): syntax error: unexpected \"!=\"") _, err = Parse("col%7umn") assert.EqualError(t, err, "invalid URL escape \"%7u\"") From 1b70c7843e9c4f322404061e636a0f1c858591d5 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Mon, 6 Nov 2023 09:36:00 +0100 Subject: [PATCH 19/24] Fully & correctly resolve complex filter strings --- internal/filter/parser.go | 151 ++++++++++++++++++++------------- internal/filter/parser.y | 73 +++++++++++----- internal/filter/parser_test.go | 72 +++++++++++----- internal/filter/types.go | 4 + 4 files changed, 203 insertions(+), 97 deletions(-) diff --git a/internal/filter/parser.go b/internal/filter/parser.go index d2897f87..9b2d03f5 100644 --- a/internal/filter/parser.go +++ b/internal/filter/parser.go @@ -8,7 +8,10 @@ import __yyfmt__ "fmt" //line parser.y:3 -import "net/url" +import ( + "net/url" + "slices" +) // reduceFilter reduces the given filter rules into a single filter chain (initiated with the provided operator). // When the operator type of the first argument (Filter) is not of type filter.Any or the given operator is not @@ -16,40 +19,64 @@ import "net/url" // Otherwise, it will pop the last pushed rule of that chain (first argument) and append it to the new *And chain. // // Example: `foo=bar|bar~foo&col!~val` -// The first argument `rule` is supposed to be a filter.Any Chain containing the first two conditions. +// The first argument `left` is supposed to be a filter.Any Chain containing the first two conditions. // We then call this function when the parser is processing the logical `&` op and the Unlike condition, // and what this function will do is logically re-group the conditions into `foo=bar|(bar~foo&col!~val)`. -func reduceFilter(rule Filter, op string, rules ...Filter) Filter { - chain, ok := rule.(*Chain) +func reduceFilter(left Filter, op string, right Filter) Filter { + chain, ok := left.(*Chain) if ok && chain.op == Any && LogicalOp(op) == All { - // Retrieve the last pushed condition and append it to the new "And" chain instead - andChain, _ := NewChain(All, chain.pop()) - andChain.add(rules...) + // Retrieve the last pushed filter Condition and append it to the new "And" chain instead + back := chain.pop() + // Chain#pop can return a filter Chain, and since we are only allowed to regroup two filter conditions, + // we must traverse the last element of every single popped Chain till we reach a filter condition. + for back != nil { + if backChain, ok := back.(*Chain); !ok || backChain.grouped { + // If the popped element is not of type filter Chain or the filter chain is parenthesized, + // we don't need to continue here, so break out of the loop. + break + } - chain.add(andChain) + // Re-add the just popped item before stepping into it and popping its last item. + chain.add(back) - return chain + chain = back.(*Chain) + back = chain.pop() + } + + andChain, _ := NewChain(All, back) + // We don't need to regroup an already grouped filter chain, since braces gain + // a higher precedence than any logical operators. + if anyChain, ok := right.(*Chain); ok && anyChain.op == Any && !chain.grouped && !anyChain.grouped { + andChain.add(anyChain.top()) + // Prepend the newly created All chain + anyChain.rules = slices.Insert[[]Filter, Filter](anyChain.rules, 0, andChain) + chain.add(anyChain) + } else { + andChain.add(right) + chain.add(andChain) + } + + return left } - // If the given operator is the same as the already existsing chains operator (*chain), + // If the given operator is the same as the already existing chains operator (*chain), // we don't need to create another chain of the same operator type. Avoids something // like &Chain{op: All, &Chain{op: All, ...}} if chain == nil || chain.op != LogicalOp(op) { - newChain, err := NewChain(LogicalOp(op), rule) + var err error + chain, err = NewChain(LogicalOp(op), left) if err != nil { // Just panic, filter.Parse will try to recover from this. panic(err) } - - chain = newChain } - chain.add(rules...) + chain.add(right) return chain } -//line parser.y:47 +//line parser.y:74 type yySymType struct { yys int expr Filter @@ -94,7 +121,7 @@ const yyEofCode = 1 const yyErrCode = 2 const yyInitialStackSize = 16 -//line parser.y:181 +//line parser.y:216 //line yacctab:1 var yyExca = [...]int8{ @@ -105,51 +132,51 @@ var yyExca = [...]int8{ const yyPrivate = 57344 -const yyLast = 32 +const yyLast = 37 var yyAct = [...]int8{ - 14, 30, 22, 23, 24, 25, 26, 28, 27, 29, - 6, 16, 1, 9, 8, 16, 13, 4, 5, 2, - 7, 21, 31, 15, 10, 11, 20, 17, 18, 19, - 12, 3, + 15, 9, 8, 17, 6, 1, 32, 24, 25, 26, + 27, 28, 30, 29, 31, 4, 17, 5, 9, 8, + 22, 14, 2, 23, 33, 16, 13, 20, 21, 3, + 18, 7, 0, 19, 10, 11, 12, } var yyPact = [...]int16{ - -5, -1000, 0, 0, 0, -1, -1000, -5, -1000, -1000, - -5, -5, -1000, -5, -2, -1000, -1000, -1000, -1000, -1000, - -17, 3, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, -1000, + -11, 5, 5, 5, 5, 4, -1000, -11, -1000, -1000, + -11, -11, -11, -1000, -11, 3, -1000, -1000, -1000, -1000, + -1000, -1000, -12, -9, -1000, -1000, -1000, -1000, -1000, -1000, + -1000, -1000, -1000, -1000, } var yyPgo = [...]int8{ - 0, 12, 19, 31, 17, 30, 23, 21, 18, 0, - 20, + 0, 5, 22, 29, 15, 26, 25, 23, 17, 0, + 31, } var yyR1 = [...]int8{ - 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, - 5, 6, 9, 8, 8, 10, 10, 7, 7, 7, - 7, 7, 7, 7, 7, + 0, 1, 1, 1, 2, 2, 3, 3, 4, 5, + 5, 5, 6, 9, 8, 8, 10, 10, 7, 7, + 7, 7, 7, 7, 7, 7, } var yyR2 = [...]int8{ - 0, 3, 1, 3, 1, 3, 1, 2, 3, 3, - 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, + 0, 3, 1, 3, 3, 1, 3, 1, 2, 3, + 3, 1, 1, 1, 0, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, } var yyChk = [...]int16{ -1000, -1, -2, -3, -4, -8, 15, -10, 14, 13, - -10, -10, -5, 17, -9, -6, 12, -2, -4, -4, - -1, -7, 4, 5, 6, 7, 8, 10, 9, 11, - 18, -9, + -10, -10, -10, -5, 17, -9, -6, 12, -2, -2, + -4, -4, -1, -7, 4, 5, 6, 7, 8, 10, + 9, 11, 18, -9, } var yyDef = [...]int8{ - 13, -2, 2, 4, 6, 0, 14, 13, 15, 16, - 13, 13, 7, 13, 11, 10, 12, 1, 3, 5, - 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, - 8, 9, + 14, -2, 2, 5, 7, 0, 15, 14, 16, 17, + 14, 14, 14, 8, 14, 12, 11, 13, 3, 1, + 4, 6, 0, 0, 18, 19, 20, 21, 22, 23, + 24, 25, 9, 10, } var yyTok1 = [...]int8{ @@ -516,32 +543,39 @@ yydefault: case 1: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:92 +//line parser.y:119 { yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) yylex.(*Lexer).rule = yyVAL.expr } case 2: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.y:97 +//line parser.y:124 { yylex.(*Lexer).rule = yyVAL.expr } case 3: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:103 +//line parser.y:128 { yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) + yylex.(*Lexer).rule = yyVAL.expr } - case 5: + case 4: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:110 +//line parser.y:135 { yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) } - case 7: + case 6: + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.y:142 + { + yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) + } + case 8: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.y:117 +//line parser.y:149 { if yyDollar[1].text != "" { // NewChain is only going to return an error if an invalid operator is specified, and since @@ -551,15 +585,18 @@ yydefault: yyVAL.expr = yyDollar[2].expr } } - case 8: + case 9: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:129 +//line parser.y:161 { yyVAL.expr = yyDollar[2].expr + if chain, ok := yyVAL.expr.(*Chain); ok { + chain.grouped = true + } } - case 9: + case 10: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:133 +//line parser.y:168 { cond, err := NewCondition(yyDollar[1].text, CompOperator(yyDollar[2].text), yyDollar[3].text) if err != nil { @@ -569,15 +606,15 @@ yydefault: yyVAL.expr = cond } - case 11: + case 12: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.y:146 +//line parser.y:181 { yyVAL.expr = NewExists(yyDollar[1].text) } - case 12: + case 13: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.y:152 +//line parser.y:187 { column, err := url.QueryUnescape(yyDollar[1].text) if err != nil { @@ -587,9 +624,9 @@ yydefault: yyVAL.text = column } - case 13: + case 14: yyDollar = yyS[yypt-0 : yypt+1] -//line parser.y:163 +//line parser.y:198 { yyVAL.text = "" } diff --git a/internal/filter/parser.y b/internal/filter/parser.y index 954b2f4e..6a0df916 100644 --- a/internal/filter/parser.y +++ b/internal/filter/parser.y @@ -2,7 +2,10 @@ package filter -import "net/url" +import ( + "net/url" + "slices" +) // reduceFilter reduces the given filter rules into a single filter chain (initiated with the provided operator). // When the operator type of the first argument (Filter) is not of type filter.Any or the given operator is not @@ -10,35 +13,59 @@ import "net/url" // Otherwise, it will pop the last pushed rule of that chain (first argument) and append it to the new *And chain. // // Example: `foo=bar|bar~foo&col!~val` -// The first argument `rule` is supposed to be a filter.Any Chain containing the first two conditions. +// The first argument `left` is supposed to be a filter.Any Chain containing the first two conditions. // We then call this function when the parser is processing the logical `&` op and the Unlike condition, // and what this function will do is logically re-group the conditions into `foo=bar|(bar~foo&col!~val)`. -func reduceFilter(rule Filter, op string, rules ...Filter) Filter { - chain, ok := rule.(*Chain); +func reduceFilter(left Filter, op string, right Filter) Filter { + chain, ok := left.(*Chain) if ok && chain.op == Any && LogicalOp(op) == All { - // Retrieve the last pushed condition and append it to the new "And" chain instead - andChain, _ := NewChain(All, chain.pop()) - andChain.add(rules...) + // Retrieve the last pushed filter Condition and append it to the new "And" chain instead + back := chain.pop() + // Chain#pop can return a filter Chain, and since we are only allowed to regroup two filter conditions, + // we must traverse the last element of every single popped Chain till we reach a filter condition. + for back != nil { + if backChain, ok := back.(*Chain); !ok || backChain.grouped { + // If the popped element is not of type filter Chain or the filter chain is parenthesized, + // we don't need to continue here, so break out of the loop. + break + } + + // Re-add the just popped item before stepping into it and popping its last item. + chain.add(back) + + chain = back.(*Chain) + back = chain.pop() + } - chain.add(andChain) + andChain, _ := NewChain(All, back) + // We don't need to regroup an already grouped filter chain, since braces gain + // a higher precedence than any logical operators. + if anyChain, ok := right.(*Chain); ok && anyChain.op == Any && !chain.grouped && !anyChain.grouped { + andChain.add(anyChain.top()) + // Prepend the newly created All chain + anyChain.rules = slices.Insert[[]Filter, Filter](anyChain.rules, 0, andChain) + chain.add(anyChain) + } else { + andChain.add(right) + chain.add(andChain) + } - return chain + return left } - // If the given operator is the same as the already existsing chains operator (*chain), + // If the given operator is the same as the already existing chains operator (*chain), // we don't need to create another chain of the same operator type. Avoids something // like &Chain{op: All, &Chain{op: All, ...}} if chain == nil || chain.op != LogicalOp(op) { - newChain, err := NewChain(LogicalOp(op), rule) - if err != nil { - // Just panic, filter.Parse will try to recover from this. - panic(err) - } - - chain = newChain + var err error + chain, err = NewChain(LogicalOp(op), left) + if err != nil { + // Just panic, filter.Parse will try to recover from this. + panic(err) + } } - chain.add(rules...) + chain.add(right) return chain } @@ -93,10 +120,15 @@ filter_rule: filter_chain logical_op filter_chain $$ = reduceFilter($1, $2, $3) yylex.(*Lexer).rule = $$ } - | filter_chain + | filter_chain %prec PREFER_SHIFTING_LOGICAL_OP { yylex.(*Lexer).rule = $$ } + | filter_rule logical_op filter_chain + { + $$ = reduceFilter($1, $2, $3) + yylex.(*Lexer).rule = $$ + } ; filter_chain: conditions_expr logical_op maybe_negated_condition_expr @@ -128,6 +160,9 @@ maybe_negated_condition_expr: optional_negation condition_expr condition_expr: "(" filter_rule ")" { $$ = $2 + if chain, ok := $$.(*Chain); ok { + chain.grouped = true + } } | identifier comparison_op identifier { diff --git a/internal/filter/parser_test.go b/internal/filter/parser_test.go index 0f38e32d..45250362 100644 --- a/internal/filter/parser_test.go +++ b/internal/filter/parser_test.go @@ -13,7 +13,7 @@ func TestParser(t *testing.T) { t.Parallel() _, err := Parse("(a=b|c=d)e=f") - assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \"column or value\"") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \"column or value\", expecting \"|\" or \"&\"") _, err = Parse("(a=b|c=d|)e=f") assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\", expecting \"column or value\" or \"(\"") @@ -22,10 +22,10 @@ func TestParser(t *testing.T) { assert.EqualError(t, err, "1:5 (4): syntax error: unexpected \"(\", expecting \"column or value\"") _, err = Parse("(((x=a)&y=b") - assert.EqualError(t, err, "1:12 (11): syntax error: unexpected $end, expecting \")\"") + assert.EqualError(t, err, "1:12 (11): syntax error: unexpected $end, expecting \"|\" or \"&\" or \")\"") _, err = Parse("(x=a)&y=b)") - assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\"") + assert.EqualError(t, err, "1:10 (9): syntax error: unexpected \")\", expecting \"|\" or \"&\"") _, err = Parse("!(&") assert.EqualError(t, err, "1:3 (2): syntax error: unexpected \"&\", expecting \"column or value\" or \"(\"") @@ -34,10 +34,10 @@ func TestParser(t *testing.T) { assert.EqualError(t, err, "1:9 (8): syntax error: unexpected \"(\", expecting \"column or value\"") _, err = Parse("foo&bar=te(st)") - assert.EqualError(t, err, "1:11 (10): syntax error: unexpected \"(\"") + assert.EqualError(t, err, "1:11 (10): syntax error: unexpected \"(\", expecting \"|\" or \"&\"") _, err = Parse("foo&bar=test)") - assert.EqualError(t, err, "1:13 (12): syntax error: unexpected \")\"") + assert.EqualError(t, err, "1:13 (12): syntax error: unexpected \")\", expecting \"|\" or \"&\"") _, err = Parse("!()|&()&)") assert.EqualError(t, err, "1:3 (2): syntax error: unexpected \")\", expecting \"column or value\" or \"(\"") @@ -70,13 +70,13 @@ func TestParser(t *testing.T) { assert.EqualError(t, err, "1:7 (6): syntax error: unexpected $end, expecting \"column or value\" or \"(\"") _, err = Parse("foo&bar&col=val!=val") - assert.EqualError(t, err, "1:17 (16): syntax error: unexpected \"!=\"") + assert.EqualError(t, err, "1:17 (16): syntax error: unexpected \"!=\", expecting \"|\" or \"&\"") _, err = Parse("col%7umn") assert.EqualError(t, err, "invalid URL escape \"%7u\"") _, err = Parse("((0&((((((((((((((((((((((0=0)") - assert.EqualError(t, err, "1:31 (30): syntax error: unexpected $end, expecting \")\"") + assert.EqualError(t, err, "1:31 (30): syntax error: unexpected $end, expecting \"|\" or \"&\" or \")\"") // IPL web filter parser accepts such invalid strings, but our Lexer doesn't. _, err = Parse("foo\x00") @@ -193,7 +193,7 @@ func TestParser(t *testing.T) { rule, err = Parse("(!foo=bar)") assert.Nil(t, err, "There should be no errors but got: %s", err) - expected = &Chain{op: None, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}} + expected = &Chain{op: None, grouped: true, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}} assert.Equal(t, expected, rule) rule, err = Parse("!(foo=bar)") @@ -204,7 +204,7 @@ func TestParser(t *testing.T) { rule, err = Parse("!(!foo=bar)") assert.Nil(t, err, "There should be no errors but got: %s", err) expected = &Chain{op: None, rules: []Filter{ - &Chain{op: None, rules: []Filter{ + &Chain{op: None, grouped: true, rules: []Filter{ &Condition{op: Equal, column: "foo", value: "bar"}, }}, }} @@ -213,7 +213,7 @@ func TestParser(t *testing.T) { rule, err = Parse("!(foo=bar|bar=foo)") assert.Nil(t, err, "There should be no errors but got: %s", err) expected = &Chain{op: None, rules: []Filter{ - &Chain{op: Any, rules: []Filter{ + &Chain{op: Any, grouped: true, rules: []Filter{ &Condition{op: Equal, column: "foo", value: "bar"}, &Condition{op: Equal, column: "bar", value: "foo"}, }}, @@ -222,8 +222,8 @@ func TestParser(t *testing.T) { rule, err = Parse("((!foo=bar)&bar!=foo)") assert.Nil(t, err, "There should be no errors but got: %s", err) - expected = &Chain{op: All, rules: []Filter{ - &Chain{op: None, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}}, + expected = &Chain{op: All, grouped: true, rules: []Filter{ + &Chain{op: None, grouped: true, rules: []Filter{&Condition{op: Equal, column: "foo", value: "bar"}}}, &Condition{op: UnEqual, column: "bar", value: "foo"}, }} assert.Equal(t, expected, rule) @@ -239,7 +239,7 @@ func TestParser(t *testing.T) { rule, err = Parse("!(!foo|bar)") assert.Nil(t, err, "There should be no errors but got: %s", err) expected = &Chain{op: None, rules: []Filter{ - &Chain{op: Any, rules: []Filter{ + &Chain{op: Any, grouped: true, rules: []Filter{ &Chain{op: None, rules: []Filter{&Exists{column: "foo"}}}, &Exists{column: "bar"}, }}, @@ -249,8 +249,8 @@ func TestParser(t *testing.T) { rule, err = Parse("!(!(foo|bar))") assert.Nil(t, err, "There should be no errors but got: %s", err) expected = &Chain{op: None, rules: []Filter{ - &Chain{op: None, rules: []Filter{ - &Chain{op: Any, rules: []Filter{ + &Chain{op: None, grouped: true, rules: []Filter{ + &Chain{op: Any, grouped: true, rules: []Filter{ &Exists{column: "foo"}, &Exists{column: "bar"}}, }, @@ -270,12 +270,12 @@ func TestParser(t *testing.T) { assert.Nil(t, err, "There should be no errors but got: %s", err) expected = &Chain{op: All, rules: []Filter{ &Chain{op: None, rules: []Filter{ - &Chain{op: Any, rules: []Filter{ + &Chain{op: Any, grouped: true, rules: []Filter{ &Condition{op: Equal, column: "foo", value: "bar"}, &Condition{op: Equal, column: "bar", value: "foo"}, }}, }}, - &Chain{op: Any, rules: []Filter{ + &Chain{op: Any, grouped: true, rules: []Filter{ &Condition{op: UnEqual, column: "foo", value: "bar"}, &Condition{op: UnEqual, column: "bar", value: "foo"}, }}, @@ -301,6 +301,36 @@ func TestParser(t *testing.T) { }} assert.Equal(t, expected, rule) + rule, err = Parse("foo!~bar&bar~foo|col=val&val!~col|col~val&yes!=no|yes~no&no~yes|foo&!test") + assert.Nil(t, err, "There should be no errors but got: %s", err) + expected = &Chain{op: Any, rules: []Filter{ + &Chain{op: All, rules: []Filter{ + &Condition{op: UnLike, column: "foo", value: "bar"}, + &Condition{op: Like, column: "bar", value: "foo"}, + }}, + &Chain{op: Any, rules: []Filter{ + &Chain{op: All, rules: []Filter{ + &Condition{op: Equal, column: "col", value: "val"}, + &Condition{op: UnLike, column: "val", value: "col"}, + }}, + &Chain{op: All, rules: []Filter{ + &Condition{op: Like, column: "col", value: "val"}, + &Condition{op: UnEqual, column: "yes", value: "no"}, + }}, + }}, + &Chain{op: Any, rules: []Filter{ + &Chain{op: All, rules: []Filter{ + &Condition{op: Like, column: "yes", value: "no"}, + &Condition{op: Like, column: "no", value: "yes"}, + }}, + &Chain{op: All, rules: []Filter{ + NewExists("foo"), + &Chain{op: None, rules: []Filter{NewExists("test")}}, + }}, + }}, + }} + assert.Equal(t, expected, rule) + rule, err = Parse("foo=bar&bar!=foo&(john>doe|doe Date: Mon, 6 Nov 2023 09:37:00 +0100 Subject: [PATCH 20/24] Ignore auto generated parser actions file --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 84c048a7..3b9f9945 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ /build/ + +# Exclude the autogenerated parser.output file +parser.output From cef24782e4017b61876e4bde75305220175ec199 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Mon, 20 Nov 2023 16:31:39 +0100 Subject: [PATCH 21/24] Add test case for all the filter evaluation methods --- internal/filter/filter_test.go | 140 +++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 internal/filter/filter_test.go diff --git a/internal/filter/filter_test.go b/internal/filter/filter_test.go new file mode 100644 index 00000000..aa7048ed --- /dev/null +++ b/internal/filter/filter_test.go @@ -0,0 +1,140 @@ +package filter + +import ( + "errors" + "github.com/stretchr/testify/assert" + "regexp" + "strings" + "testing" +) + +const unknown string = "unknown" + +var evalError = errors.New("evaluation error") + +func TestFilter(t *testing.T) { + t.Parallel() + + filterable := &filterableType{ + key: "domain", + value: "example.com", + } + + t.Run("InvalidOperator", func(t *testing.T) { + chain, err := NewChain("unknown", nil) + assert.Nil(t, chain) + assert.EqualError(t, err, "invalid logical operator provided: \"unknown\"") + + condition, err := NewCondition("column", "unknown", "value") + assert.Nil(t, condition) + assert.EqualError(t, err, "invalid comparison operator provided: \"unknown\"") + }) + + t.Run("EvaluationError", func(t *testing.T) { + t.Parallel() + + testInvalidData := []struct { + Expression string + }{ + {"domain=" + unknown}, + {"domain!=" + unknown}, + {"domain<" + unknown}, + {"domain<=" + unknown}, + {"domain>" + unknown}, + {"domain>=" + unknown}, + {"domain~" + unknown}, + {"domain!~" + unknown}, + {"!(domain!=" + unknown + ")"}, + {"domain=" + unknown + "&domain<=test.example.com"}, + {"domain<=" + unknown + "|domain<=test.example.com"}, + } + + for _, td := range testInvalidData { + f, err := Parse(td.Expression) + assert.NoError(t, err) + + matched, err := f.Eval(filterable) + assert.EqualError(t, err, evalError.Error()) + assert.Equal(t, matched, false, "unexpected filter result for %q", td.Expression) + } + }) + + t.Run("EvaluateFilter", func(t *testing.T) { + t.Parallel() + + testdata := []struct { + Expression string + Expected bool + }{ + {"domain=example.com", true}, + {"domain!=example.com", false}, + {"domain=test.example.com", false}, + {"name!=example.com", false}, + {"domain", true}, + {"name", false}, + {"display_name", false}, + {"!name", true}, + {"domain~example*", true}, + {"domain!~example*", false}, + {"domain~example*&!domain", false}, + {"domain>a", true}, + {"domainz", false}, + {"domain=example&domain<=test.example.com", true}, + {"domain<=example|domain<=test.example.com", true}, + {"domain<=example|domain>=test.example.com", false}, + } + + for _, td := range testdata { + f, err := Parse(td.Expression) + if assert.NoError(t, err, "parsing %q should not return an error", td.Expression) { + matched, err := f.Eval(filterable) + assert.NoError(t, err) + assert.Equal(t, td.Expected, matched, "unexpected filter result for %q", td.Expression) + } + } + }) +} + +type filterableType struct { + key string + value string +} + +func (f *filterableType) EvalEqual(_ string, value string) (bool, error) { + if value == unknown { + return false, evalError + } + + return strings.EqualFold(f.value, value), nil +} + +func (f *filterableType) EvalLess(_ string, value string) (bool, error) { + if value == unknown { + return false, evalError + } + + return f.value < value, nil +} + +func (f *filterableType) EvalLike(_ string, value string) (bool, error) { + if value == unknown { + return false, evalError + } + + regex := regexp.MustCompile("^example.*$") + return regex.MatchString(f.value), nil +} + +func (f *filterableType) EvalLessOrEqual(_ string, value string) (bool, error) { + if value == unknown { + return false, evalError + } + + return f.value <= value, nil +} + +func (f *filterableType) EvalExists(key string) bool { + return f.key == key +} From 82d3141dd67452e977203baf1b4d3af742c45f73 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Wed, 10 Apr 2024 17:25:26 +0200 Subject: [PATCH 22/24] Fix mixed tab & space indentention --- internal/filter/parser.y | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/internal/filter/parser.y b/internal/filter/parser.y index 6a0df916..20f9e763 100644 --- a/internal/filter/parser.y +++ b/internal/filter/parser.y @@ -126,8 +126,8 @@ filter_rule: filter_chain logical_op filter_chain } | filter_rule logical_op filter_chain { - $$ = reduceFilter($1, $2, $3) - yylex.(*Lexer).rule = $$ + $$ = reduceFilter($1, $2, $3) + yylex.(*Lexer).rule = $$ } ; @@ -184,15 +184,15 @@ exists_expr: identifier ; identifier: T_IDENTIFIER - { - column, err := url.QueryUnescape($1) - if err != nil { - // Something went wrong, so just panic and filter.Parse will try to recover from this. - panic(err) - } - - $$ = column - } + { + column, err := url.QueryUnescape($1) + if err != nil { + // Something went wrong, so just panic and filter.Parse will try to recover from this. + panic(err) + } + + $$ = column + } ; optional_negation: /* empty */ { $$ = "" } From 755b6aeb11ea7571de9385cb1a4b4638a1a86baa Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Wed, 17 Apr 2024 09:45:16 +0200 Subject: [PATCH 23/24] Fix confusing yacc rule names --- internal/filter/parser.y | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/internal/filter/parser.y b/internal/filter/parser.y index 20f9e763..c9bd14f1 100644 --- a/internal/filter/parser.y +++ b/internal/filter/parser.y @@ -77,8 +77,8 @@ func reduceFilter(left Filter, op string, right Filter) Filter { } %type filter_rule +%type filter_chain_list %type filter_chain -%type conditions_expr %type maybe_negated_condition_expr %type condition_expr %type exists_expr @@ -102,7 +102,7 @@ func reduceFilter(left Filter, op string, right Filter) Filter { %type "!" // This is just used for declaring explicit precedence and resolves shift/reduce conflicts -// in `filter_chain` and `conditions_expr` rules. +// in `filter_chain_list` and `filter_chain` rules. %nonassoc PREFER_SHIFTING_LOGICAL_OP %nonassoc T_EQUAL T_UNEQUAL T_LIKE T_UNLIKE @@ -115,30 +115,30 @@ func reduceFilter(left Filter, op string, right Filter) Filter { %% -filter_rule: filter_chain logical_op filter_chain +filter_rule: filter_chain_list logical_op filter_chain_list { $$ = reduceFilter($1, $2, $3) yylex.(*Lexer).rule = $$ } - | filter_chain %prec PREFER_SHIFTING_LOGICAL_OP + | filter_chain_list %prec PREFER_SHIFTING_LOGICAL_OP { yylex.(*Lexer).rule = $$ } - | filter_rule logical_op filter_chain + | filter_rule logical_op filter_chain_list { $$ = reduceFilter($1, $2, $3) yylex.(*Lexer).rule = $$ } ; -filter_chain: conditions_expr logical_op maybe_negated_condition_expr +filter_chain_list: filter_chain logical_op maybe_negated_condition_expr { $$ = reduceFilter($1, $2, $3) } - | conditions_expr %prec PREFER_SHIFTING_LOGICAL_OP + | filter_chain %prec PREFER_SHIFTING_LOGICAL_OP ; -conditions_expr: maybe_negated_condition_expr logical_op maybe_negated_condition_expr +filter_chain: maybe_negated_condition_expr logical_op maybe_negated_condition_expr { $$ = reduceFilter($1, $2, $3) } From 714258456dd1de28a1ff5de6c7e350b97b3b452b Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Wed, 14 Aug 2024 11:37:25 +0200 Subject: [PATCH 24/24] Disable `//line` directives for `goyacc` generated codes Otherwise the dwarf information of that parser will get messed up - incomplete and one will not be able to step into that source code using a debugger (tested it using delve). --- internal/filter/lexer.go | 2 +- internal/filter/parser.go | 23 +---------------------- 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/internal/filter/lexer.go b/internal/filter/lexer.go index f6f0e798..ff846c44 100644 --- a/internal/filter/lexer.go +++ b/internal/filter/lexer.go @@ -1,4 +1,4 @@ -//go:generate goyacc -v parser.output -o parser.go parser.y +//go:generate goyacc -l -v parser.output -o parser.go parser.y package filter diff --git a/internal/filter/parser.go b/internal/filter/parser.go index 9b2d03f5..0c833f2a 100644 --- a/internal/filter/parser.go +++ b/internal/filter/parser.go @@ -1,13 +1,9 @@ -// Code generated by goyacc -v parser.output -o parser.go parser.y. DO NOT EDIT. - -//line parser.y:2 +// Code generated by goyacc -l -v parser.output -o parser.go parser.y. DO NOT EDIT. package filter import __yyfmt__ "fmt" -//line parser.y:3 - import ( "net/url" "slices" @@ -76,7 +72,6 @@ func reduceFilter(left Filter, op string, right Filter) Filter { return chain } -//line parser.y:74 type yySymType struct { yys int expr Filter @@ -121,9 +116,6 @@ const yyEofCode = 1 const yyErrCode = 2 const yyInitialStackSize = 16 -//line parser.y:216 - -//line yacctab:1 var yyExca = [...]int8{ -1, 1, 1, -1, @@ -210,8 +202,6 @@ var yyErrorMessages = [...]struct { msg string }{} -//line yaccpar:1 - /* parser for yacc output */ var ( @@ -543,39 +533,33 @@ yydefault: case 1: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:119 { yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) yylex.(*Lexer).rule = yyVAL.expr } case 2: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.y:124 { yylex.(*Lexer).rule = yyVAL.expr } case 3: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:128 { yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) yylex.(*Lexer).rule = yyVAL.expr } case 4: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:135 { yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) } case 6: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:142 { yyVAL.expr = reduceFilter(yyDollar[1].expr, yyDollar[2].text, yyDollar[3].expr) } case 8: yyDollar = yyS[yypt-2 : yypt+1] -//line parser.y:149 { if yyDollar[1].text != "" { // NewChain is only going to return an error if an invalid operator is specified, and since @@ -587,7 +571,6 @@ yydefault: } case 9: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:161 { yyVAL.expr = yyDollar[2].expr if chain, ok := yyVAL.expr.(*Chain); ok { @@ -596,7 +579,6 @@ yydefault: } case 10: yyDollar = yyS[yypt-3 : yypt+1] -//line parser.y:168 { cond, err := NewCondition(yyDollar[1].text, CompOperator(yyDollar[2].text), yyDollar[3].text) if err != nil { @@ -608,13 +590,11 @@ yydefault: } case 12: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.y:181 { yyVAL.expr = NewExists(yyDollar[1].text) } case 13: yyDollar = yyS[yypt-1 : yypt+1] -//line parser.y:187 { column, err := url.QueryUnescape(yyDollar[1].text) if err != nil { @@ -626,7 +606,6 @@ yydefault: } case 14: yyDollar = yyS[yypt-0 : yypt+1] -//line parser.y:198 { yyVAL.text = "" }