From 200e590563134f2a22498cd5cce5e5e903c07e3d Mon Sep 17 00:00:00 2001 From: Alexander Yastrebov Date: Wed, 17 Jan 2024 15:23:43 +0100 Subject: [PATCH] eskip: improve lexer performance 2 (#2870) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * eskip: add BenchmarkParse Add a benchmark for parsing 10000 routes. Signed-off-by: Alexander Yastrebov * eskip: improve lexer performance 2 * use plain ascii instead of unicode package * use loop for scanSymbol * call scan functions directly instead of selectScanner to aid inlining ``` goos: linux goarch: amd64 pkg: github.com/zalando/skipper/eskip │ HEAD~1 │ HEAD │ │ sec/op │ sec/op vs base │ ParsePredicates-8 9.637µ ± 11% 8.894µ ± 4% -7.71% (p=0.001 n=10) Parse-8 329.1m ± 4% 272.7m ± 2% -17.15% (p=0.000 n=10) geomean 1.781m 1.557m -12.56% │ HEAD~1 │ HEAD │ │ B/op │ B/op vs base │ ParsePredicates-8 2.008Ki ± 0% 2.008Ki ± 0% ~ (p=1.000 n=10) Parse-8 49.94Mi ± 0% 49.94Mi ± 0% ~ (p=0.926 n=10) geomean 320.4Ki 320.4Ki -0.00% │ HEAD~1 │ HEAD │ │ allocs/op │ allocs/op vs base │ ParsePredicates-8 33.00 ± 0% 33.00 ± 0% ~ (p=1.000 n=10) ¹ Parse-8 1.100M ± 0% 1.100M ± 0% ~ (p=0.367 n=10) geomean 6.025k 6.025k +0.00% ¹ all samples are equal ``` See previous #2755 Signed-off-by: Alexander Yastrebov --------- Signed-off-by: Alexander Yastrebov --- .gitignore | 2 ++ eskip/eskip_test.go | 34 ++++++++++++++++++++ eskip/lexer.go | 77 +++++++++++++++++++++------------------------ 3 files changed, 72 insertions(+), 41 deletions(-) diff --git a/.gitignore b/.gitignore index b43acd7b92..518b18fbab 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,5 @@ opentracingplugin/build build/ skptesting/lorem.html .vscode/* +*.test + diff --git a/eskip/eskip_test.go b/eskip/eskip_test.go index efa50f7444..a12e21b127 100644 --- a/eskip/eskip_test.go +++ b/eskip/eskip_test.go @@ -3,6 +3,7 @@ package eskip import ( "reflect" "regexp" + "strings" "testing" "github.com/google/go-cmp/cmp" @@ -849,6 +850,39 @@ func BenchmarkParsePredicates(b *testing.B) { } } +func BenchmarkParse(b *testing.B) { + doc := strings.Repeat(`xxxx_xx__xxxxx__xxx_xxxxxxxx_xxxxxxxxxx_xxxxxxx_xxxxxxx_xxxxxxx_xxxxx__xxx__40_0: + Path("/xxxxxxxxx/:xxxxxxxx_xx/xxxxxxxx-xxxxxxxxxx-xxxxxxxxx") + && Host("^(xxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx-xxxxxxx-xxxx-18[.]xxx-xxxx[.]xxxxx[.]xx[.]?(:[0-9]+)?|xxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx-xxxxxxx-xxxx-19[.]xxx-xxxx[.]xxxxx[.]xx[.]?(:[0-9]+)?|xxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx-xxxxxxx-xxxx-20[.]xxx-xxxx[.]xxxxx[.]xx[.]?(:[0-9]+)?|xxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx-xxxxxxx-xxxx-21[.]xxx-xxxx[.]xxxxx[.]xx[.]?(:[0-9]+)?|xxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx-xxxxxxx[.]xxx-xxxx[.]xxxxx[.]xx[.]?(:[0-9]+)?|xxxxxxxxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx[.]xxxxxxxxxxx[.]xxx[.]?(:[0-9]+)?)$") + && Host("^(xxx-xxxxxxxx-xxxxxxxxxx-xxxxxxx-xxxxxxx-xxxx-21[.]xxx-xxxx[.]xxxxx[.]xx[.]?(:[0-9]+)?)$") + && Weight(4) + && Method("GET") + && JWTPayloadAllKV("xxxxx://xxxxxxxx.xxxxxxx.xxx/xxxxx", "xxxxx") + && Header("X-Xxxxxxxxx-Xxxxx", "xxxxx") + -> disableAccessLog(2, 3, 40, 500) + -> fifo(1000, 100, "10s") + -> apiUsageMonitoring("{\"xxx_xx\":\"xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx\",\"xxxxxxxxxxx_xx\":\"xxx-xxxxxxxx-xxxxxxxxxx\",\"xxxx_xxxxxxxxx\":[\"/xxxxxxxxx/{xxxxxxxx_xx}/xxxxxxxx-xxxxxxxxxx\",\"/xxxxxxxxx/{xxxxxxxx_xx}/xxxxxxxx-xxxxxxxxxx-xxxxxxx\",\"/xxxxxxxxx/{xxxxxxxx_xx}/xxxxxxxx-xxxxxxxxxx-xxxxxxxxx\"]}") + -> oauthTokeninfoAnyKV("xxxxx", "/xxxxxxxxx") + -> unverifiedAuditLog("xxxxx://xxxxxxxx.xxxxxxx.xxx/xxxxxxx-xx") + -> oauthTokeninfoAllScope("xxx") + -> flowId("reuse") + -> forwardToken("X-XxxxxXxxx-Xxxxxxx", "xxx", "xxxxx", "xxxxx") + -> stateBagToTag("xxxx-xxxx", "xxxxxx.xxx") + -> ; + `, 10_000) + + _, err := Parse(doc) + if err != nil { + b.Fatal(err) + } + + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = Parse(doc) + } +} + var stringSink string func BenchmarkRouteString(b *testing.B) { diff --git a/eskip/lexer.go b/eskip/lexer.go index e7438fca67..4100870073 100644 --- a/eskip/lexer.go +++ b/eskip/lexer.go @@ -4,7 +4,6 @@ import ( "errors" "fmt" "strings" - "unicode" ) type token struct { @@ -14,14 +13,6 @@ type token struct { type charPredicate func(byte) bool -type scanner interface { - scan(string) (token, string, error) -} - -type scannerFunc func(string) (token, string, error) - -func (sf scannerFunc) scan(code string) (token, string, error) { return sf(code) } - type eskipLex struct { code string lastToken string @@ -78,11 +69,11 @@ func (l *eskipLex) init(code string) { func isNewline(c byte) bool { return c == newlineChar } func isUnderscore(c byte) bool { return c == underscore } -func isAlpha(c byte) bool { return unicode.IsLetter(rune(c)) } -func isDigit(c byte) bool { return unicode.IsDigit(rune(c)) } -func isSymbolChar(c byte) bool { return isUnderscore(c) || isAlpha(c) || isDigit(c) } +func isAlpha(c byte) bool { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') } +func isDigit(c byte) bool { return c >= '0' && c <= '9' } +func isSymbolChar(c byte) bool { return isAlpha(c) || isDigit(c) || isUnderscore(c) } func isDecimalChar(c byte) bool { return c == decimalChar } -func isNumberChar(c byte) bool { return isDecimalChar(c) || isDigit(c) } +func isNumberChar(c byte) bool { return isDigit(c) || isDecimalChar(c) } func scanWhile(code string, p charPredicate) (string, string) { for i := 0; i < len(code); i++ { @@ -277,74 +268,78 @@ func scanNumber(code string) (t token, rest string, err error) { func scanSymbol(code string) (t token, rest string, err error) { t.id = symbol - t.val, rest = scanWhile(code, isSymbolChar) + for i := 0; i < len(code); i++ { + if !isSymbolChar(code[i]) { + t.val, rest = code[0:i], code[i:] + return + } + } + t.val, rest = code, "" return } -func selectScanner(code string) scanner { +func scan(code string) (token, string, error) { switch code[0] { case ',': - return commaToken + return commaToken.scan(code) case ')': - return closeparenToken + return closeparenToken.scan(code) case '(': - return openparenToken + return openparenToken.scan(code) case ':': - return colonToken + return colonToken.scan(code) case ';': - return semicolonToken + return semicolonToken.scan(code) case '>': - return closearrowToken + return closearrowToken.scan(code) case '*': - return anyToken + return anyToken.scan(code) case '&': if len(code) >= 2 && code[1] == '&' { - return andToken + return andToken.scan(code) } case '-': if len(code) >= 2 && code[1] == '>' { - return arrowToken + return arrowToken.scan(code) } case '/': - return scannerFunc(scanRegexpOrComment) + return scanRegexpOrComment(code) case '"': - return scannerFunc(scanDoubleQuote) + return scanDoubleQuote(code) case '`': - return scannerFunc(scanBacktick) + return scanBacktick(code) case '<': for _, tok := range openarrowPrefixedTokens { if strings.HasPrefix(code, tok.val) { - return tok + return tok.scan(code) } } - return openarrowToken + return openarrowToken.scan(code) } if isNumberChar(code[0]) { - return scannerFunc(scanNumber) + return scanNumber(code) } if isAlpha(code[0]) || isUnderscore(code[0]) { - return scannerFunc(scanSymbol) + return scanSymbol(code) } - return nil + return token{}, "", unexpectedToken } -func (l *eskipLex) next() (t token, err error) { +func (l *eskipLex) next() (token, error) { l.code = scanWhitespace(l.code) if len(l.code) == 0 { - err = eof - return + return token{}, eof } - s := selectScanner(l.code) - if s == nil { - err = unexpectedToken - return + t, rest, err := scan(l.code) + if err == unexpectedToken { + return token{}, err } + l.code = rest - t, l.code, err = s.scan(l.code) if err == void { return l.next() } @@ -353,7 +348,7 @@ func (l *eskipLex) next() (t token, err error) { l.lastToken = t.val } - return + return t, err } func (l *eskipLex) Lex(lval *eskipSymType) int {