From 8b2c4eb9b442f9193bf4e996524376e1d2bdb0f6 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Mon, 23 Oct 2023 12:25:42 +0200 Subject: [PATCH] Add auto-generated goyacc filter `Parser` --- internal/filter/parser.go | 778 +++++++++++++++++++++++++------------- 1 file changed, 508 insertions(+), 270 deletions(-) diff --git a/internal/filter/parser.go b/internal/filter/parser.go index 1ce642bf7..a5ee5ec76 100644 --- a/internal/filter/parser.go +++ b/internal/filter/parser.go @@ -1,357 +1,595 @@ -package filter +// Code generated by goyacc -v parser.output -o parser.go parser.y. DO NOT EDIT. -import ( - "fmt" - "net/url" - "strings" -) +//line parser.y:2 -type Parser struct { - tag string - pos, length, openParenthesis int -} +package filter -// Parse parses an object filter expression. -func Parse(expression string) (Filter, error) { - parser := &Parser{tag: expression, length: len(expression)} - if parser.length == 0 { - return &All{}, nil +import __yyfmt__ "fmt" + +// reduceFilter reduces the given filter rules into a single filter chain (initiated with the provided operator). +// When the operator type of the second argument (Filter) is not of type filter.Any or the given operator is not +// of type filter.All, this will just create a new chain with the new op and append all the filter rules to it. +// Otherwise, it will pop the last pushed rule of that chain (second argument) and append it to the new *And chain. +// +// Example: `foo=bar|bar~foo&col!~val` +// The second argument `rule` is supposed to be a filter.ANY *Chain contains the first two conditions. +// We then call this function when the parser is processing the logical `&` op and the Unlike condition, +// and what this function will do is logically re-group the conditions into `foo=bar|(bar~foo&col!~val)`. +// +//line parser.y:3 +func reduceFilter(op string, rule Filter, rules ...Filter) Filter { + chain, ok := rule.(*Chain) + if ok && chain.op == ANY && LogicalOp(op) == ALL { + // Retrieve the last pushed condition and append it to the new "And" chain instead + andChain, _ := NewChain(ALL, chain.pop()) + andChain.add(rules...) + + chain.add(andChain) + + return chain } - return parser.readFilter(0, "", nil) -} - -// readFilter reads the entire filter from the Parser.tag and derives a filter.Filter from it. -// Returns an error on parsing failure. -func (p *Parser) readFilter(nestingLevel int, operator string, rules []Filter) (Filter, error) { - negate := false - for p.pos < p.length { - condition, err := p.readCondition() + // If the given operator is the same as the already existsing chains operator (*chain), + // we don't need to create another chain of the same operator type. Avoids something + // like &Chain{op: ALL, &Chain{op: ALL, ...}} + if chain == nil || chain.op != LogicalOp(op) { + newChain, err := NewChain(LogicalOp(op), rule) if err != nil { - return nil, err + // Just panic, filter.Parse will try to recover from this. + panic(err) } - next := p.readChar() - if condition == nil { - if next == "!" { - negate = true - continue - } - - if operator == "" && len(rules) > 0 && (next == "&" || next == "|") { - operator = next - continue - } + chain = newChain + } - if next == "" { - break - } + chain.add(rules...) - if next == ")" { - p.openParenthesis-- + return chain +} - if nestingLevel > 0 { - next = p.nextChar() - if next != "" && next != "&" && next != "|" && next != ")" { - p.pos++ - return nil, p.parseError(next, "Expected logical operator") - } +//line parser.y:45 +type yySymType struct { + yys int + expr Filter + text string +} - break - } +const T_EQUAL = 57346 +const T_UNEQUAL = 57347 +const T_LIKE = 57348 +const T_UNLIKE = 57349 +const T_LESS_THAN = 57350 +const T_GREATER_THAN = 57351 +const T_LESS_THAN_OR_EQUAL = 57352 +const T_GREATER_THAN_OR_EQUAL = 57353 +const T_STRING = 57354 +const T_IDENTIFIER = 57355 +const PREFER_SHIFTING_LOGICAL_OP = 57356 + +var yyToknames = [...]string{ + "$end", + "error", + "$unk", + "T_EQUAL", + "\"=\"", + "T_UNEQUAL", + "\"!\"", + "T_LIKE", + "\"~\"", + "T_UNLIKE", + "T_LESS_THAN", + "\"<\"", + "T_GREATER_THAN", + "\">\"", + "T_LESS_THAN_OR_EQUAL", + "T_GREATER_THAN_OR_EQUAL", + "T_STRING", + "T_IDENTIFIER", + "\"&\"", + "\"|\"", + "PREFER_SHIFTING_LOGICAL_OP", + "\"(\"", + "\")\"", +} - return nil, p.parseError(next, "") - } +var yyStatenames = [...]string{} - if next == "(" { - if p.nextChar() == "&" || p.nextChar() == "|" { - // When a logical operator follows directly after the opening parenthesis "(", - // this can't be a valid expression. E.g. "!(&" - next = p.readChar() +const yyEofCode = 1 +const yyErrCode = 2 +const yyInitialStackSize = 16 - return nil, p.parseError(next, "") - } +//line yacctab:1 +var yyExca = [...]int8{ + -1, 1, + 1, -1, + -2, 0, +} - p.openParenthesis++ +const yyPrivate = 57344 - op := "" - if negate { - op = "!" - } +const yyLast = 34 - rule, err := p.readFilter(nestingLevel+1, op, nil) - if err != nil { - return nil, err - } +var yyAct = [...]int8{ + 17, 16, 31, 14, 23, 13, 24, 5, 25, 22, + 26, 27, 4, 29, 2, 28, 30, 8, 9, 17, + 16, 6, 18, 19, 20, 7, 32, 15, 21, 10, + 11, 12, 3, 1, +} - rules = append(rules, rule) - negate = false - continue - } +var yyPact = [...]int16{ + 14, -1000, -2, -2, -2, -17, -1000, 14, -1000, -1000, + 14, 14, -1000, 14, 0, -1000, -1000, -1000, -1000, -1000, + -1000, -21, 2, -1000, -1000, -1000, -1000, -1000, -1000, -1000, + -1000, -1000, -1000, +} - if next == operator { - continue - } +var yyPgo = [...]int8{ + 0, 33, 14, 32, 12, 31, 27, 9, 7, 3, + 25, +} - // When the current operator is a "!", the next one can't be a logical operator. - if operator != "!" && (next == "&" || next == "|") { - if operator == "&" { - if len(rules) > 1 { - rules = []Filter{&All{rules: rules}} - } +var yyR1 = [...]int8{ + 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, + 5, 6, 9, 9, 8, 8, 10, 10, 7, 7, + 7, 7, 7, 7, 7, 7, +} - operator = next - } else if operator == "|" || (operator == "!" && next == "&") { - // The last pushed filter chain - lastRule := rules[len(rules)-1] - // Erase it from our Rules slice - rules = rules[:len(rules)-1] +var yyR2 = [...]int8{ + 0, 3, 1, 3, 1, 3, 1, 2, 3, 3, + 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, +} - rule, err := p.readFilter(nestingLevel+1, next, []Filter{lastRule}) - if err != nil { - return nil, err - } +var yyChk = [...]int16{ + -1000, -1, -2, -3, -4, -8, 7, -10, 19, 20, + -10, -10, -5, 22, -9, -6, 18, 17, -2, -4, + -4, -2, -7, 4, 6, 8, 10, 11, 15, 13, + 16, 23, -9, +} - rules = append(rules, rule) - } +var yyDef = [...]int8{ + 14, -2, 2, 4, 6, 0, 15, 14, 16, 17, + 14, 14, 7, 14, 11, 10, 12, 13, 1, 3, + 5, 0, 0, 18, 19, 20, 21, 22, 23, 24, + 25, 8, 9, +} - continue - } +var yyTok1 = [...]int8{ + 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 7, 3, 3, 3, 3, 19, 3, + 22, 23, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 12, 5, 14, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 20, 3, 9, +} - return nil, p.parseError(next, fmt.Sprintf("operator level %d", nestingLevel)) - } else { - if negate { - negate = false - rules = append(rules, &None{rules: []Filter{condition}}) - } else { - rules = append(rules, condition) - } +var yyTok2 = [...]int8{ + 2, 3, 4, 6, 8, 10, 11, 13, 15, 16, + 17, 18, 21, +} - if next == "" { - break - } +var yyTok3 = [...]int8{ + 0, +} - if next == ")" { - p.openParenthesis-- +var yyErrorMessages = [...]struct { + state int + token int + msg string +}{} - if nestingLevel > 0 { - next = p.nextChar() - if next != "" && next != "&" && next != "|" && next != ")" { - p.pos++ - return nil, p.parseError(next, "Expected logical operator") - } +//line yaccpar:1 - break - } +/* parser for yacc output */ - return nil, p.parseError(next, "") - } +var ( + yyDebug = 0 + yyErrorVerbose = false +) - if next == operator { - continue - } +type yyLexer interface { + Lex(lval *yySymType) int + Error(s string) +} - if next == "&" || next == "|" { - if operator == "" || operator == "&" { - if operator == "&" && len(rules) > 1 { - all := &All{rules: rules} - rules = []Filter{all} - } +type yyParser interface { + Parse(yyLexer) int + Lookahead() int +} - operator = next - } else if operator == "" || (operator == "!" && next == "&") { - // The last pushed filter chain - lastRule := rules[len(rules)-1] - // Erase it from our Rules slice - rules = rules[:len(rules)-1] +type yyParserImpl struct { + lval yySymType + stack [yyInitialStackSize]yySymType + char int +} - rule, err := p.readFilter(nestingLevel+1, next, []Filter{lastRule}) - if err != nil { - return nil, err - } +func (p *yyParserImpl) Lookahead() int { + return p.char +} - rules = append(rules, rule) - } +func yyNewParser() yyParser { + return &yyParserImpl{} +} - continue - } +const yyFlag = -1000 - return nil, p.parseError(next, "") +func yyTokname(c int) string { + if c >= 1 && c-1 < len(yyToknames) { + if yyToknames[c-1] != "" { + return yyToknames[c-1] } } + return __yyfmt__.Sprintf("tok-%v", c) +} - if nestingLevel == 0 && p.pos < p.length { - return nil, p.parseError(operator, "Did not read full filter") +func yyStatname(s int) string { + if s >= 0 && s < len(yyStatenames) { + if yyStatenames[s] != "" { + return yyStatenames[s] + } } + return __yyfmt__.Sprintf("state-%v", s) +} - if nestingLevel == 0 && p.openParenthesis > 0 { - return nil, fmt.Errorf("invalid filter '%s', missing %d closing ')' at pos %d", p.tag, p.openParenthesis, p.pos) - } +func yyErrorMessage(state, lookAhead int) string { + const TOKSTART = 4 - if nestingLevel == 0 && p.openParenthesis < 0 { - return nil, fmt.Errorf("invalid filter '%s', unexpected closing ')' at pos %d", p.tag, p.pos) + if !yyErrorVerbose { + return "syntax error" } - var chain Filter - switch operator { - case "&": - chain = &All{rules: rules} - case "|": - chain = &Any{rules: rules} - case "!": - chain = &None{rules: rules} - case "": - if nestingLevel == 0 && rules != nil { - // There is only one filter tag, no chain - return rules[0], nil + for _, e := range yyErrorMessages { + if e.state == state && e.token == lookAhead { + return "syntax error: " + e.msg } - - chain = &All{rules: rules} - default: - return nil, p.parseError(operator, "") } - return chain, nil -} + res := "syntax error: unexpected " + yyTokname(lookAhead) -// readCondition reads the next filter.Filter. -// returns nil if there is no char to read and an error on parsing failure. -func (p *Parser) readCondition() (Filter, error) { - column, err := p.readColumn() - if err != nil || column == "" { - return nil, err - } + // To match Bison, suggest at most four expected tokens. + expected := make([]int, 0, 4) - operator := "" - if strings.Contains("=>= 0 && n < yyLast && int(yyChk[int(yyAct[n])]) == tok { + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) + } } - if operator == "" { - return NewExists(column), nil - } + if yyDef[state] == -2 { + i := 0 + for yyExca[i] != -1 || int(yyExca[i+1]) != state { + i += 2 + } - if strings.Contains(">= 0; i += 2 { + tok := int(yyExca[i]) + if tok < TOKSTART || yyExca[i+1] == 0 { + continue + } + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) } - } - value, err := p.readValue() - if err != nil { - return nil, err + // If the default action is to accept or reduce, give up. + if yyExca[i+1] != 0 { + return res + } } - condition, err := p.createCondition(column, operator, value) - if err != nil { - return nil, err + for i, tok := range expected { + if i == 0 { + res += ", expecting " + } else { + res += " or " + } + res += yyTokname(tok) } - - return condition, nil + return res } -// createCondition creates a filter.Filter based on the given operator. -// returns nil when invalid operator is given. -func (p *Parser) createCondition(column string, operator string, value string) (Filter, error) { - column = strings.TrimSpace(column) - switch operator { - case "=": - if strings.Contains(value, "*") { - return &Like{column: column, value: value}, nil +func yylex1(lex yyLexer, lval *yySymType) (char, token int) { + token = 0 + char = lex.Lex(lval) + if char <= 0 { + token = int(yyTok1[0]) + goto out + } + if char < len(yyTok1) { + token = int(yyTok1[char]) + goto out + } + if char >= yyPrivate { + if char < yyPrivate+len(yyTok2) { + token = int(yyTok2[char-yyPrivate]) + goto out } - - return &Equal{column: column, value: value}, nil - case "!=": - if strings.Contains(value, "*") { - return &Unlike{column: column, value: value}, nil + } + for i := 0; i < len(yyTok3); i += 2 { + token = int(yyTok3[i+0]) + if token == char { + token = int(yyTok3[i+1]) + goto out } + } - return &UnEqual{column: column, value: value}, nil - case ">": - return &GreaterThan{column: column, value: value}, nil - case ">=": - return &GreaterThanOrEqual{column: column, value: value}, nil - case "<": - return &LessThan{column: column, value: value}, nil - case "<=": - return &LessThanOrEqual{column: column, value: value}, nil - default: - return nil, fmt.Errorf("invalid operator %s provided", operator) +out: + if token == 0 { + token = int(yyTok2[1]) /* unknown char */ } + if yyDebug >= 3 { + __yyfmt__.Printf("lex %s(%d)\n", yyTokname(token), uint(char)) + } + return char, token } -// readColumn reads a column name from the Parser.tag. -// returns empty string if there is no char to read. -func (p *Parser) readColumn() (string, error) { - return url.QueryUnescape(p.readUntil("=()&|><") - if value == "" { - return "", nil +func (yyrcvr *yyParserImpl) Parse(yylex yyLexer) int { + var yyn int + var yyVAL yySymType + var yyDollar []yySymType + _ = yyDollar // silence set and not used + yyS := yyrcvr.stack[:] + + Nerrs := 0 /* number of errors */ + Errflag := 0 /* error recovery flag */ + yystate := 0 + yyrcvr.char = -1 + yytoken := -1 // yyrcvr.char translated into internal numbering + defer func() { + // Make sure we report no lookahead when not parsing. + yystate = -1 + yyrcvr.char = -1 + yytoken = -1 + }() + yyp := -1 + goto yystack + +ret0: + return 0 + +ret1: + return 1 + +yystack: + /* put a state and value onto the stack */ + if yyDebug >= 4 { + __yyfmt__.Printf("char %v in %v\n", yyTokname(yytoken), yyStatname(yystate)) } - return url.QueryUnescape(value) -} + yyp++ + if yyp >= len(yyS) { + nyys := make([]yySymType, len(yyS)*2) + copy(nyys, yyS) + yyS = nyys + } + yyS[yyp] = yyVAL + yyS[yyp].yys = yystate -// readUntil reads chars until any of the given characters -// May return empty string if there is no char to read -func (p *Parser) readUntil(chars string) string { - var buffer string - for char := p.readChar(); char != ""; char = p.readChar() { - if strings.Contains(chars, char) { - p.pos-- - break +yynewstate: + yyn = int(yyPact[yystate]) + if yyn <= yyFlag { + goto yydefault /* simple state */ + } + if yyrcvr.char < 0 { + yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval) + } + yyn += yytoken + if yyn < 0 || yyn >= yyLast { + goto yydefault + } + yyn = int(yyAct[yyn]) + if int(yyChk[yyn]) == yytoken { /* valid shift */ + yyrcvr.char = -1 + yytoken = -1 + yyVAL = yyrcvr.lval + yystate = yyn + if Errflag > 0 { + Errflag-- } - - buffer += char + goto yystack } - return buffer -} - -// readChar peeks the next char of the Parser.tag and increments the Parser.pos by one -// returns empty if there is no char to read -func (p *Parser) readChar() string { - if p.pos < p.length { - pos := p.pos - p.pos++ +yydefault: + /* default state action */ + yyn = int(yyDef[yystate]) + if yyn == -2 { + if yyrcvr.char < 0 { + yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval) + } - return string(p.tag[pos]) + /* look through exception table */ + xi := 0 + for { + if yyExca[xi+0] == -1 && int(yyExca[xi+1]) == yystate { + break + } + xi += 2 + } + for xi += 2; ; xi += 2 { + yyn = int(yyExca[xi+0]) + if yyn < 0 || yyn == yytoken { + break + } + } + yyn = int(yyExca[xi+1]) + if yyn < 0 { + goto ret0 + } } + if yyn == 0 { + /* error ... attempt to resume parsing */ + switch Errflag { + case 0: /* brand new error */ + yylex.Error(yyErrorMessage(yystate, yytoken)) + Nerrs++ + if yyDebug >= 1 { + __yyfmt__.Printf("%s", yyStatname(yystate)) + __yyfmt__.Printf(" saw %s\n", yyTokname(yytoken)) + } + fallthrough + + case 1, 2: /* incompletely recovered error ... try again */ + Errflag = 3 + + /* find a state where "error" is a legal shift action */ + for yyp >= 0 { + yyn = int(yyPact[yyS[yyp].yys]) + yyErrCode + if yyn >= 0 && yyn < yyLast { + yystate = int(yyAct[yyn]) /* simulate a shift of "error" */ + if int(yyChk[yystate]) == yyErrCode { + goto yystack + } + } - return "" -} + /* the current p has no shift on "error", pop stack */ + if yyDebug >= 2 { + __yyfmt__.Printf("error recovery pops state %d\n", yyS[yyp].yys) + } + yyp-- + } + /* there is no state on the stack with an error shift ... abort */ + goto ret1 -// nextChar peeks the next char from the parser tag -// returns empty string if there is no char to read -func (p *Parser) nextChar() string { - if p.pos < p.length { - return string(p.tag[p.pos]) + case 3: /* no shift yet; clobber input char */ + if yyDebug >= 2 { + __yyfmt__.Printf("error recovery discards %s\n", yyTokname(yytoken)) + } + if yytoken == yyEofCode { + goto ret1 + } + yyrcvr.char = -1 + yytoken = -1 + goto yynewstate /* try again in the same state */ + } } - return "" -} + /* reduction by production yyn */ + if yyDebug >= 2 { + __yyfmt__.Printf("reduce %v in:\n\t%v\n", yyn, yyStatname(yystate)) + } -// parseError returns a formatted and detailed parser error. -// If you don't provide the char that causes the parser to fail, the char at `p.pos` is automatically used. -// By specifying the `msg` arg you can provide additional err hints that can help debugging. -func (p *Parser) parseError(invalidChar string, msg string) error { - if invalidChar == "" { - pos := p.pos - if p.pos == p.length { - pos-- + yynt := yyn + yypt := yyp + _ = yypt // guard against "declared and not used" + + yyp -= int(yyR2[yyn]) + // yyp is now the index of $0. Perform the default action. Iff the + // reduced production is ε, $1 is possibly out of range. + if yyp+1 >= len(yyS) { + nyys := make([]yySymType, len(yyS)*2) + copy(nyys, yyS) + yyS = nyys + } + yyVAL = yyS[yyp+1] + + /* consult goto table to find next state */ + yyn = int(yyR1[yyn]) + yyg := int(yyPgo[yyn]) + yyj := yyg + yyS[yyp].yys + 1 + + if yyj >= yyLast { + yystate = int(yyAct[yyg]) + } else { + yystate = int(yyAct[yyj]) + if int(yyChk[yystate]) != -yyn { + yystate = int(yyAct[yyg]) } - - invalidChar = string(p.tag[pos]) } + // dummy call; replaced with literal code + switch yynt { + + case 1: + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.y:98 + { + yyVAL.expr = reduceFilter(yyDollar[2].text, yyDollar[1].expr, yyDollar[3].expr) + yylex.(*Lexer).rule = yyVAL.expr + } + case 3: + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.y:106 + { + yyVAL.expr = reduceFilter(yyDollar[2].text, yyDollar[1].expr, yyDollar[3].expr) + yylex.(*Lexer).rule = yyVAL.expr + } + case 5: + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.y:114 + { + yyVAL.expr = reduceFilter(yyDollar[2].text, yyDollar[1].expr, yyDollar[3].expr) + yylex.(*Lexer).rule = yyVAL.expr + } + case 7: + yyDollar = yyS[yypt-2 : yypt+1] +//line parser.y:122 + { + if yyDollar[1].text != "" { + // NewChain is only going to return an error if an invalid operator is specified, and since + // we explicitly provide the NONE operator, we don't expect an error to be returned. + yyVAL.expr, _ = NewChain(NONE, yyDollar[2].expr) + } else { + yyVAL.expr = yyDollar[2].expr + } - if msg != "" { - msg = ": " + msg - } + yylex.(*Lexer).rule = yyVAL.expr + } + case 8: + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.y:135 + { + yyVAL.expr = yyDollar[2].expr + yylex.(*Lexer).rule = yyVAL.expr + } + case 9: + yyDollar = yyS[yypt-3 : yypt+1] +//line parser.y:140 + { + cond, err := NewCondition(yyDollar[1].text, yyDollar[2].text, yyDollar[3].text) + if err != nil { + // Something went wrong, so just panic and filter.Parse will try to recover from this. + panic(err) + } + + yyVAL.expr = cond + yylex.(*Lexer).rule = yyVAL.expr + } + case 11: + yyDollar = yyS[yypt-1 : yypt+1] +//line parser.y:154 + { + exists, err := NewExists(yyDollar[1].text) + if err != nil { + // Something went wrong, so just panic and filter.Parse will try to recover from this. + panic(err) + } - return fmt.Errorf("invalid filter '%s', unexpected %s at pos %d%s", p.tag, invalidChar, p.pos, msg) + yyVAL.expr = exists + yylex.(*Lexer).rule = yyVAL.expr + } + case 14: + yyDollar = yyS[yypt-0 : yypt+1] +//line parser.y:170 + { + yyVAL.text = "" + } + } + goto yystack /* stack new state and value */ }