From afa974fb057e23af552b48f4f7c947141c486e96 Mon Sep 17 00:00:00 2001 From: merlin Date: Tue, 1 Oct 2024 12:52:55 +0300 Subject: [PATCH 01/18] base case make benchmark more extensive add quote to string add BenchmarkSanitizeSQL --- internal/sanitize/sanitize_bench_test.go | 62 ++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 internal/sanitize/sanitize_bench_test.go diff --git a/internal/sanitize/sanitize_bench_test.go b/internal/sanitize/sanitize_bench_test.go new file mode 100644 index 000000000..baa742b11 --- /dev/null +++ b/internal/sanitize/sanitize_bench_test.go @@ -0,0 +1,62 @@ +// sanitize_benchmark_test.go +package sanitize_test + +import ( + "testing" + "time" + + "github.com/jackc/pgx/v5/internal/sanitize" +) + +var benchmarkSanitizeResult string + +const benchmarkQuery = "" + + `SELECT * + FROM "water_containers" + WHERE NOT "id" = $1 -- int64 + AND "tags" NOT IN $2 -- nil + AND "volume" > $3 -- float64 + AND "transportable" = $4 -- bool + AND position($5 IN "sign") -- bytes + AND "label" LIKE $6 -- string + AND "created_at" > $7; -- time.Time` + +var benchmarkArgs = []any{ + int64(12345), + nil, + float64(500), + true, + []byte("8BADF00D"), + "kombucha's han'dy awokowa", + time.Date(2015, 10, 1, 0, 0, 0, 0, time.UTC), +} + +func BenchmarkSanitize(b *testing.B) { + query, err := sanitize.NewQuery(benchmarkQuery) + if err != nil { + b.Fatalf("failed to create query: %v", err) + } + + b.ResetTimer() + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + benchmarkSanitizeResult, err = query.Sanitize(benchmarkArgs...) + if err != nil { + b.Fatalf("failed to sanitize query: %v", err) + } + } +} + +var benchmarkNewSQLResult string + +func BenchmarkSanitizeSQL(b *testing.B) { + b.ReportAllocs() + var err error + for i := 0; i < b.N; i++ { + benchmarkNewSQLResult, err = sanitize.SanitizeSQL(benchmarkQuery, benchmarkArgs...) + if err != nil { + b.Fatalf("failed to sanitize SQL: %v", err) + } + } +} From aabed18db8d4bc55f5d2e18a23a7707156e456ef Mon Sep 17 00:00:00 2001 From: merlin Date: Tue, 1 Oct 2024 12:57:07 +0300 Subject: [PATCH 02/18] add benchmark tool fix benchmmark script fix benchmark script --- internal/sanitize/benchmmark.sh | 59 +++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 internal/sanitize/benchmmark.sh diff --git a/internal/sanitize/benchmmark.sh b/internal/sanitize/benchmmark.sh new file mode 100644 index 000000000..87e7e0a11 --- /dev/null +++ b/internal/sanitize/benchmmark.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +current_branch=$(git rev-parse --abbrev-ref HEAD) +if [ "$current_branch" == "HEAD" ]; then + current_branch=$(git rev-parse HEAD) +fi + +restore_branch() { + echo "Restoring original branch/commit: $current_branch" + git checkout "$current_branch" +} +trap restore_branch EXIT + +# Check if there are uncommitted changes +if ! git diff --quiet || ! git diff --cached --quiet; then + echo "There are uncommitted changes. Please commit or stash them before running this script." + exit 1 +fi + +# Ensure that at least one commit argument is passed +if [ "$#" -lt 1 ]; then + echo "Usage: $0 ... " + exit 1 +fi + +commits=("$@") +benchmarks_dir=benchmarks + +if ! mkdir -p "${benchmarks_dir}"; then + echo "Unable to create dir for benchmarks data" + exit 1 +fi + +# Benchmark results +bench_files=() + +# Run benchmark for each listed commit +for i in "${!commits[@]}"; do + commit="${commits[i]}" + git checkout "$commit" || { + echo "Failed to checkout $commit" + exit 1 + } + + # Sanitized commmit message + commit_message=$(git log -1 --pretty=format:"%s" | tr ' ' '_') + + # Benchmark data will go there + bench_file="${benchmarks_dir}/${i}_${commit_message}.bench" + + if ! go test -bench=. -count=25 >"$bench_file"; then + echo "Benchmarking failed for commit $commit" + exit 1 + fi + + bench_files+=("$bench_file") +done + +benchstat "${bench_files[@]}" From efc2c9ff4438a84402c5631b9de961b9f62440db Mon Sep 17 00:00:00 2001 From: merlin Date: Tue, 1 Oct 2024 12:53:07 +0300 Subject: [PATCH 03/18] buf pool --- internal/sanitize/sanitize.go | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/internal/sanitize/sanitize.go b/internal/sanitize/sanitize.go index df58c4484..4a069658b 100644 --- a/internal/sanitize/sanitize.go +++ b/internal/sanitize/sanitize.go @@ -6,6 +6,7 @@ import ( "fmt" "strconv" "strings" + "sync" "time" "unicode/utf8" ) @@ -24,9 +25,26 @@ type Query struct { // https://github.com/jackc/pgx/issues/1380 const replacementcharacterwidth = 3 +var bufPool = &sync.Pool{} + +func getBuf() *bytes.Buffer { + buf, _ := bufPool.Get().(*bytes.Buffer) + if buf == nil { + buf = &bytes.Buffer{} + } + + return buf +} + +func putBuf(buf *bytes.Buffer) { + buf.Reset() + bufPool.Put(buf) +} + func (q *Query) Sanitize(args ...any) (string, error) { argUse := make([]bool, len(args)) - buf := &bytes.Buffer{} + buf := getBuf() + defer putBuf(buf) for _, part := range q.Parts { var str string From 546ad2f4e23675f7398b5136b43f4ca2803d046d Mon Sep 17 00:00:00 2001 From: merlin Date: Tue, 1 Oct 2024 13:24:03 +0300 Subject: [PATCH 04/18] shared bytestring --- internal/sanitize/sanitize.go | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/internal/sanitize/sanitize.go b/internal/sanitize/sanitize.go index 4a069658b..c7c8acd59 100644 --- a/internal/sanitize/sanitize.go +++ b/internal/sanitize/sanitize.go @@ -41,16 +41,19 @@ func putBuf(buf *bytes.Buffer) { bufPool.Put(buf) } +var null = []byte("null") + func (q *Query) Sanitize(args ...any) (string, error) { argUse := make([]bool, len(args)) buf := getBuf() defer putBuf(buf) + var p []byte for _, part := range q.Parts { - var str string + p = p[:0] switch part := part.(type) { case string: - str = part + buf.WriteString(part) case int: argIdx := part - 1 @@ -64,19 +67,19 @@ func (q *Query) Sanitize(args ...any) (string, error) { arg := args[argIdx] switch arg := arg.(type) { case nil: - str = "null" + p = null case int64: - str = strconv.FormatInt(arg, 10) + p = strconv.AppendInt(p, arg, 10) case float64: - str = strconv.FormatFloat(arg, 'f', -1, 64) + p = strconv.AppendFloat(p, arg, 'f', -1, 64) case bool: - str = strconv.FormatBool(arg) + p = strconv.AppendBool(p, arg) case []byte: - str = QuoteBytes(arg) + p = []byte(QuoteBytes(arg)) case string: - str = QuoteString(arg) + p = []byte(QuoteString(arg)) case time.Time: - str = arg.Truncate(time.Microsecond).Format("'2006-01-02 15:04:05.999999999Z07:00:00'") + p = arg.Truncate(time.Microsecond).AppendFormat(p, "'2006-01-02 15:04:05.999999999Z07:00:00'") default: return "", fmt.Errorf("invalid arg type: %T", arg) } @@ -84,11 +87,12 @@ func (q *Query) Sanitize(args ...any) (string, error) { // Prevent SQL injection via Line Comment Creation // https://github.com/jackc/pgx/security/advisories/GHSA-m7wr-2xf7-cm9p - str = " " + str + " " + buf.WriteByte(' ') + buf.Write(p) + buf.WriteByte(' ') default: return "", fmt.Errorf("invalid Part type: %T", part) } - buf.WriteString(str) } for i, used := range argUse { From ee718a110d4b0b5eda4b887574e700b33d8b8982 Mon Sep 17 00:00:00 2001 From: merlin Date: Tue, 1 Oct 2024 13:30:34 +0300 Subject: [PATCH 05/18] append AvailableBuffer --- internal/sanitize/sanitize.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/internal/sanitize/sanitize.go b/internal/sanitize/sanitize.go index c7c8acd59..1e0b20aca 100644 --- a/internal/sanitize/sanitize.go +++ b/internal/sanitize/sanitize.go @@ -47,16 +47,14 @@ func (q *Query) Sanitize(args ...any) (string, error) { argUse := make([]bool, len(args)) buf := getBuf() defer putBuf(buf) - var p []byte for _, part := range q.Parts { - p = p[:0] switch part := part.(type) { case string: buf.WriteString(part) case int: argIdx := part - 1 - + var p []byte if argIdx < 0 { return "", fmt.Errorf("first sql argument must be > 0") } @@ -64,22 +62,23 @@ func (q *Query) Sanitize(args ...any) (string, error) { if argIdx >= len(args) { return "", fmt.Errorf("insufficient arguments") } + buf.WriteByte(' ') arg := args[argIdx] switch arg := arg.(type) { case nil: p = null case int64: - p = strconv.AppendInt(p, arg, 10) + p = strconv.AppendInt(buf.AvailableBuffer(), arg, 10) case float64: - p = strconv.AppendFloat(p, arg, 'f', -1, 64) + p = strconv.AppendFloat(buf.AvailableBuffer(), arg, 'f', -1, 64) case bool: - p = strconv.AppendBool(p, arg) + p = strconv.AppendBool(buf.AvailableBuffer(), arg) case []byte: p = []byte(QuoteBytes(arg)) case string: p = []byte(QuoteString(arg)) case time.Time: - p = arg.Truncate(time.Microsecond).AppendFormat(p, "'2006-01-02 15:04:05.999999999Z07:00:00'") + p = arg.Truncate(time.Microsecond).AppendFormat(buf.AvailableBuffer(), "'2006-01-02 15:04:05.999999999Z07:00:00'") default: return "", fmt.Errorf("invalid arg type: %T", arg) } @@ -87,7 +86,6 @@ func (q *Query) Sanitize(args ...any) (string, error) { // Prevent SQL injection via Line Comment Creation // https://github.com/jackc/pgx/security/advisories/GHSA-m7wr-2xf7-cm9p - buf.WriteByte(' ') buf.Write(p) buf.WriteByte(' ') default: From 1752f7b4c1ba7ba0d63cf4cc2f68c3ed56337716 Mon Sep 17 00:00:00 2001 From: merlin Date: Tue, 1 Oct 2024 13:47:44 +0300 Subject: [PATCH 06/18] docs --- internal/sanitize/sanitize.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/internal/sanitize/sanitize.go b/internal/sanitize/sanitize.go index 1e0b20aca..3414d6d1a 100644 --- a/internal/sanitize/sanitize.go +++ b/internal/sanitize/sanitize.go @@ -62,7 +62,11 @@ func (q *Query) Sanitize(args ...any) (string, error) { if argIdx >= len(args) { return "", fmt.Errorf("insufficient arguments") } + + // Prevent SQL injection via Line Comment Creation + // https://github.com/jackc/pgx/security/advisories/GHSA-m7wr-2xf7-cm9p buf.WriteByte(' ') + arg := args[argIdx] switch arg := arg.(type) { case nil: @@ -78,15 +82,17 @@ func (q *Query) Sanitize(args ...any) (string, error) { case string: p = []byte(QuoteString(arg)) case time.Time: - p = arg.Truncate(time.Microsecond).AppendFormat(buf.AvailableBuffer(), "'2006-01-02 15:04:05.999999999Z07:00:00'") + p = arg.Truncate(time.Microsecond). + AppendFormat(buf.AvailableBuffer(), "'2006-01-02 15:04:05.999999999Z07:00:00'") default: return "", fmt.Errorf("invalid arg type: %T", arg) } argUse[argIdx] = true + buf.Write(p) + // Prevent SQL injection via Line Comment Creation // https://github.com/jackc/pgx/security/advisories/GHSA-m7wr-2xf7-cm9p - buf.Write(p) buf.WriteByte(' ') default: return "", fmt.Errorf("invalid Part type: %T", part) From 58d4c0c94fa86d28f5931af2811ce9acdcb117da Mon Sep 17 00:00:00 2001 From: merlin Date: Tue, 1 Oct 2024 14:30:05 +0300 Subject: [PATCH 07/18] quoteBytes check new quoteBytes --- internal/sanitize/sanitize.go | 17 +++++++++++++++-- internal/sanitize/sanitize_test.go | 25 +++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/internal/sanitize/sanitize.go b/internal/sanitize/sanitize.go index 3414d6d1a..91d6db58c 100644 --- a/internal/sanitize/sanitize.go +++ b/internal/sanitize/sanitize.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/hex" "fmt" + "slices" "strconv" "strings" "sync" @@ -78,7 +79,7 @@ func (q *Query) Sanitize(args ...any) (string, error) { case bool: p = strconv.AppendBool(buf.AvailableBuffer(), arg) case []byte: - p = []byte(QuoteBytes(arg)) + p = quoteBytes(buf.AvailableBuffer(), arg) case string: p = []byte(QuoteString(arg)) case time.Time: @@ -127,7 +128,19 @@ func QuoteString(str string) string { } func QuoteBytes(buf []byte) string { - return `'\x` + hex.EncodeToString(buf) + "'" + return string(quoteBytes(nil, buf)) +} + +func quoteBytes(dst, buf []byte) []byte { + dst = append(dst, `'\x`...) + + n := hex.EncodedLen(len(buf)) + p := slices.Grow(dst[len(dst):], n)[:n] + hex.Encode(p, buf) + dst = append(dst, p...) + + dst = append(dst, `'`...) + return dst } type sqlLexer struct { diff --git a/internal/sanitize/sanitize_test.go b/internal/sanitize/sanitize_test.go index 1deff3fba..76ae7a47f 100644 --- a/internal/sanitize/sanitize_test.go +++ b/internal/sanitize/sanitize_test.go @@ -1,6 +1,7 @@ package sanitize_test import ( + "encoding/hex" "testing" "time" @@ -227,3 +228,27 @@ func TestQuerySanitize(t *testing.T) { } } } + +func TestQuoteBytes(t *testing.T) { + tc := func(name string, input []byte) { + t.Run(name, func(t *testing.T) { + t.Parallel() + + got := sanitize.QuoteBytes(input) + want := oldQuoteBytes(input) + + if got != want { + t.Errorf("got: %s", got) + t.Fatalf("want: %s", want) + } + }) + } + + tc("nil", nil) + tc("empty", []byte{}) + tc("text", []byte("abcd")) +} + +func oldQuoteBytes(buf []byte) string { + return `'\x` + hex.EncodeToString(buf) + "'" +} From ea1e13a660aa60e9e34f67277ccdd77c9a74e535 Mon Sep 17 00:00:00 2001 From: merlin Date: Tue, 1 Oct 2024 14:50:59 +0300 Subject: [PATCH 08/18] quoteString --- internal/sanitize/sanitize.go | 31 ++++++++++++++++++++++++++++-- internal/sanitize/sanitize_test.go | 25 ++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/internal/sanitize/sanitize.go b/internal/sanitize/sanitize.go index 91d6db58c..d83633a72 100644 --- a/internal/sanitize/sanitize.go +++ b/internal/sanitize/sanitize.go @@ -81,7 +81,7 @@ func (q *Query) Sanitize(args ...any) (string, error) { case []byte: p = quoteBytes(buf.AvailableBuffer(), arg) case string: - p = []byte(QuoteString(arg)) + p = quoteString(buf.AvailableBuffer(), arg) case time.Time: p = arg.Truncate(time.Microsecond). AppendFormat(buf.AvailableBuffer(), "'2006-01-02 15:04:05.999999999Z07:00:00'") @@ -124,7 +124,34 @@ func NewQuery(sql string) (*Query, error) { } func QuoteString(str string) string { - return "'" + strings.ReplaceAll(str, "'", "''") + "'" + return string(quoteString(nil, str)) +} + +func quoteString(dst []byte, str string) []byte { + const quote = "'" + + n := strings.Count(str, quote) + + dst = append(dst, quote...) + + p := slices.Grow(dst[len(dst):], len(str)+2*n) + + for len(str) > 0 { + i := strings.Index(str, quote) + if i < 0 { + p = append(p, str...) + break + } + p = append(p, str[:i]...) + p = append(p, "''"...) + str = str[i+1:] + } + + dst = append(dst, p...) + + dst = append(dst, quote...) + + return dst } func QuoteBytes(buf []byte) string { diff --git a/internal/sanitize/sanitize_test.go b/internal/sanitize/sanitize_test.go index 76ae7a47f..aafcd682d 100644 --- a/internal/sanitize/sanitize_test.go +++ b/internal/sanitize/sanitize_test.go @@ -2,6 +2,7 @@ package sanitize_test import ( "encoding/hex" + "strings" "testing" "time" @@ -229,6 +230,30 @@ func TestQuerySanitize(t *testing.T) { } } +func TestQuoteString(t *testing.T) { + tc := func(name, input string) { + t.Run(name, func(t *testing.T) { + t.Parallel() + + got := sanitize.QuoteString(input) + want := oldQuoteString(input) + + if got != want { + t.Errorf("got: %s", got) + t.Fatalf("want: %s", want) + } + }) + } + + tc("empty", "") + tc("text", "abcd") + tc("with quotes", `one's hat is always a cat`) +} + +func oldQuoteString(str string) string { + return "'" + strings.ReplaceAll(str, "'", "''") + "'" +} + func TestQuoteBytes(t *testing.T) { tc := func(name string, input []byte) { t.Run(name, func(t *testing.T) { From 4293b2526266935b0ea2f86515217d277c1e4d2a Mon Sep 17 00:00:00 2001 From: merlin Date: Tue, 1 Oct 2024 15:25:24 +0300 Subject: [PATCH 09/18] decrease number of samples in go benchmark --- internal/sanitize/benchmmark.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/sanitize/benchmmark.sh b/internal/sanitize/benchmmark.sh index 87e7e0a11..06842c0aa 100644 --- a/internal/sanitize/benchmmark.sh +++ b/internal/sanitize/benchmmark.sh @@ -48,7 +48,7 @@ for i in "${!commits[@]}"; do # Benchmark data will go there bench_file="${benchmarks_dir}/${i}_${commit_message}.bench" - if ! go test -bench=. -count=25 >"$bench_file"; then + if ! go test -bench=. -count=10 >"$bench_file"; then echo "Benchmarking failed for commit $commit" exit 1 fi From c4c1076d28cc0333616fc55b1ecea7ebf0087cc8 Mon Sep 17 00:00:00 2001 From: merlin Date: Tue, 1 Oct 2024 16:37:04 +0300 Subject: [PATCH 10/18] add FuzzQuoteString and FuzzQuoteBytes --- internal/sanitize/sanitize_fuzz_test.go | 43 +++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 internal/sanitize/sanitize_fuzz_test.go diff --git a/internal/sanitize/sanitize_fuzz_test.go b/internal/sanitize/sanitize_fuzz_test.go new file mode 100644 index 000000000..7d594def0 --- /dev/null +++ b/internal/sanitize/sanitize_fuzz_test.go @@ -0,0 +1,43 @@ +package sanitize_test + +import ( + "testing" + + "github.com/jackc/pgx/v5/internal/sanitize" +) + +func FuzzQuoteString(f *testing.F) { + f.Add("") + f.Add("\n") + f.Add("sample text") + f.Add("sample q'u'o't'e's") + f.Add("select 'quoted $42', $1") + + f.Fuzz(func(t *testing.T, input string) { + got := sanitize.QuoteString(input) + want := oldQuoteString(input) + + if want != got { + t.Errorf("got %q", got) + t.Fatalf("want %q", want) + } + }) +} + +func FuzzQuoteBytes(f *testing.F) { + f.Add([]byte(nil)) + f.Add([]byte("\n")) + f.Add([]byte("sample text")) + f.Add([]byte("sample q'u'o't'e's")) + f.Add([]byte("select 'quoted $42', $1")) + + f.Fuzz(func(t *testing.T, input []byte) { + got := sanitize.QuoteBytes(input) + want := oldQuoteBytes(input) + + if want != got { + t.Errorf("got %q", got) + t.Fatalf("want %q", want) + } + }) +} From 39ffc8b7a4df735206d45fcefb6c21f126c5d6e3 Mon Sep 17 00:00:00 2001 From: merlin Date: Tue, 1 Oct 2024 16:42:27 +0300 Subject: [PATCH 11/18] add lexer and query pools use lexer pool --- internal/sanitize/sanitize.go | 93 +++++++++++++++++++++++++---------- 1 file changed, 67 insertions(+), 26 deletions(-) diff --git a/internal/sanitize/sanitize.go b/internal/sanitize/sanitize.go index d83633a72..4aca2fb98 100644 --- a/internal/sanitize/sanitize.go +++ b/internal/sanitize/sanitize.go @@ -26,28 +26,19 @@ type Query struct { // https://github.com/jackc/pgx/issues/1380 const replacementcharacterwidth = 3 -var bufPool = &sync.Pool{} - -func getBuf() *bytes.Buffer { - buf, _ := bufPool.Get().(*bytes.Buffer) - if buf == nil { - buf = &bytes.Buffer{} - } - - return buf -} - -func putBuf(buf *bytes.Buffer) { - buf.Reset() - bufPool.Put(buf) +var bufPool = &pool[*bytes.Buffer]{ + new: func() *bytes.Buffer { + return &bytes.Buffer{} + }, + reset: (*bytes.Buffer).Reset, } var null = []byte("null") func (q *Query) Sanitize(args ...any) (string, error) { argUse := make([]bool, len(args)) - buf := getBuf() - defer putBuf(buf) + buf := bufPool.get() + defer bufPool.put(buf) for _, part := range q.Parts { switch part := part.(type) { @@ -109,18 +100,39 @@ func (q *Query) Sanitize(args ...any) (string, error) { } func NewQuery(sql string) (*Query, error) { - l := &sqlLexer{ - src: sql, - stateFn: rawState, + query := &Query{} + query.init(sql) + + return query, nil +} + +var sqlLexerPool = &pool[*sqlLexer]{ + new: func() *sqlLexer { + return &sqlLexer{} + }, + reset: func(sl *sqlLexer) { + *sl = sqlLexer{} + }, +} + +func (q *Query) init(sql string) { + parts := q.Parts[:0] + if parts == nil { + n := strings.Count(sql, "$") + strings.Count(sql, "--") + 1 + parts = make([]Part, 0, n) } + l := sqlLexerPool.get() + defer sqlLexerPool.put(l) + l.src = sql + l.stateFn = rawState + l.parts = parts + for l.stateFn != nil { l.stateFn = l.stateFn(l) } - query := &Query{Parts: l.parts} - - return query, nil + q.Parts = l.parts } func QuoteString(str string) string { @@ -385,13 +397,42 @@ func multilineCommentState(l *sqlLexer) stateFn { } } +var queryPool = &pool[*Query]{ + new: func() *Query { + return &Query{} + }, + reset: func(q *Query) { + q.Parts = q.Parts[:0] + }, +} + // SanitizeSQL replaces placeholder values with args. It quotes and escapes args // as necessary. This function is only safe when standard_conforming_strings is // on. func SanitizeSQL(sql string, args ...any) (string, error) { - query, err := NewQuery(sql) - if err != nil { - return "", err - } + query := queryPool.get() + query.init(sql) + defer queryPool.put(query) + return query.Sanitize(args...) } + +type pool[E any] struct { + p sync.Pool + new func() E + reset func(E) +} + +func (pool *pool[E]) get() E { + v, ok := pool.p.Get().(E) + if !ok { + v = pool.new() + } + + return v +} + +func (p *pool[E]) put(v E) { + p.reset(v) + p.p.Put(v) +} From 59d6aa87b9e78e5f694854bf36143d13621c6b77 Mon Sep 17 00:00:00 2001 From: merlin Date: Tue, 1 Oct 2024 17:04:48 +0300 Subject: [PATCH 12/18] rework QuoteString and QuoteBytes as append-style --- internal/sanitize/sanitize.go | 16 ++++------------ internal/sanitize/sanitize_fuzz_test.go | 8 ++++---- internal/sanitize/sanitize_test.go | 4 ++-- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/internal/sanitize/sanitize.go b/internal/sanitize/sanitize.go index 4aca2fb98..fd1e808b4 100644 --- a/internal/sanitize/sanitize.go +++ b/internal/sanitize/sanitize.go @@ -70,9 +70,9 @@ func (q *Query) Sanitize(args ...any) (string, error) { case bool: p = strconv.AppendBool(buf.AvailableBuffer(), arg) case []byte: - p = quoteBytes(buf.AvailableBuffer(), arg) + p = QuoteBytes(buf.AvailableBuffer(), arg) case string: - p = quoteString(buf.AvailableBuffer(), arg) + p = QuoteString(buf.AvailableBuffer(), arg) case time.Time: p = arg.Truncate(time.Microsecond). AppendFormat(buf.AvailableBuffer(), "'2006-01-02 15:04:05.999999999Z07:00:00'") @@ -135,11 +135,7 @@ func (q *Query) init(sql string) { q.Parts = l.parts } -func QuoteString(str string) string { - return string(quoteString(nil, str)) -} - -func quoteString(dst []byte, str string) []byte { +func QuoteString(dst []byte, str string) []byte { const quote = "'" n := strings.Count(str, quote) @@ -166,11 +162,7 @@ func quoteString(dst []byte, str string) []byte { return dst } -func QuoteBytes(buf []byte) string { - return string(quoteBytes(nil, buf)) -} - -func quoteBytes(dst, buf []byte) []byte { +func QuoteBytes(dst, buf []byte) []byte { dst = append(dst, `'\x`...) n := hex.EncodedLen(len(buf)) diff --git a/internal/sanitize/sanitize_fuzz_test.go b/internal/sanitize/sanitize_fuzz_test.go index 7d594def0..746558276 100644 --- a/internal/sanitize/sanitize_fuzz_test.go +++ b/internal/sanitize/sanitize_fuzz_test.go @@ -14,10 +14,10 @@ func FuzzQuoteString(f *testing.F) { f.Add("select 'quoted $42', $1") f.Fuzz(func(t *testing.T, input string) { - got := sanitize.QuoteString(input) + got := sanitize.QuoteString(nil, input) want := oldQuoteString(input) - if want != got { + if want != string(got) { t.Errorf("got %q", got) t.Fatalf("want %q", want) } @@ -32,10 +32,10 @@ func FuzzQuoteBytes(f *testing.F) { f.Add([]byte("select 'quoted $42', $1")) f.Fuzz(func(t *testing.T, input []byte) { - got := sanitize.QuoteBytes(input) + got := sanitize.QuoteBytes(nil, input) want := oldQuoteBytes(input) - if want != got { + if want != string(got) { t.Errorf("got %q", got) t.Fatalf("want %q", want) } diff --git a/internal/sanitize/sanitize_test.go b/internal/sanitize/sanitize_test.go index aafcd682d..9da701ea9 100644 --- a/internal/sanitize/sanitize_test.go +++ b/internal/sanitize/sanitize_test.go @@ -235,7 +235,7 @@ func TestQuoteString(t *testing.T) { t.Run(name, func(t *testing.T) { t.Parallel() - got := sanitize.QuoteString(input) + got := string(sanitize.QuoteString(nil, input)) want := oldQuoteString(input) if got != want { @@ -259,7 +259,7 @@ func TestQuoteBytes(t *testing.T) { t.Run(name, func(t *testing.T) { t.Parallel() - got := sanitize.QuoteBytes(input) + got := string(sanitize.QuoteBytes(nil, input)) want := oldQuoteBytes(input) if got != want { From 90a77b13b200afbfe64bae275df1ea6a55cb4aa3 Mon Sep 17 00:00:00 2001 From: merlin Date: Tue, 1 Oct 2024 17:15:38 +0300 Subject: [PATCH 13/18] add docs to sanitize tests --- internal/sanitize/sanitize_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/sanitize/sanitize_test.go b/internal/sanitize/sanitize_test.go index 9da701ea9..926751534 100644 --- a/internal/sanitize/sanitize_test.go +++ b/internal/sanitize/sanitize_test.go @@ -250,6 +250,8 @@ func TestQuoteString(t *testing.T) { tc("with quotes", `one's hat is always a cat`) } +// This function was used before optimizations. +// You should keep for testing purposes - we want to ensure there are no breaking changes. func oldQuoteString(str string) string { return "'" + strings.ReplaceAll(str, "'", "''") + "'" } @@ -274,6 +276,8 @@ func TestQuoteBytes(t *testing.T) { tc("text", []byte("abcd")) } +// This function was used before optimizations. +// You should keep for testing purposes - we want to ensure there are no breaking changes. func oldQuoteBytes(buf []byte) string { return `'\x` + hex.EncodeToString(buf) + "'" } From 47cbd8edb8b59458af8191dddb2cf1b0f96f4603 Mon Sep 17 00:00:00 2001 From: merlin Date: Tue, 1 Oct 2024 17:16:02 +0300 Subject: [PATCH 14/18] drop too large values from memory pools --- internal/sanitize/sanitize.go | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/internal/sanitize/sanitize.go b/internal/sanitize/sanitize.go index fd1e808b4..173523d95 100644 --- a/internal/sanitize/sanitize.go +++ b/internal/sanitize/sanitize.go @@ -26,11 +26,17 @@ type Query struct { // https://github.com/jackc/pgx/issues/1380 const replacementcharacterwidth = 3 +const maxBufSize = 16384 // 16 Ki + var bufPool = &pool[*bytes.Buffer]{ new: func() *bytes.Buffer { return &bytes.Buffer{} }, - reset: (*bytes.Buffer).Reset, + reset: func(b *bytes.Buffer) bool { + n := b.Len() + b.Reset() + return n < maxBufSize + }, } var null = []byte("null") @@ -110,20 +116,23 @@ var sqlLexerPool = &pool[*sqlLexer]{ new: func() *sqlLexer { return &sqlLexer{} }, - reset: func(sl *sqlLexer) { + reset: func(sl *sqlLexer) bool { *sl = sqlLexer{} + return true }, } func (q *Query) init(sql string) { parts := q.Parts[:0] if parts == nil { + // dirty, but fast heuristic to preallocate for ~90% usecases n := strings.Count(sql, "$") + strings.Count(sql, "--") + 1 parts = make([]Part, 0, n) } l := sqlLexerPool.get() defer sqlLexerPool.put(l) + l.src = sql l.stateFn = rawState l.parts = parts @@ -393,8 +402,10 @@ var queryPool = &pool[*Query]{ new: func() *Query { return &Query{} }, - reset: func(q *Query) { + reset: func(q *Query) bool { + n := len(q.Parts) q.Parts = q.Parts[:0] + return n < 64 // drop too large queries }, } @@ -412,7 +423,7 @@ func SanitizeSQL(sql string, args ...any) (string, error) { type pool[E any] struct { p sync.Pool new func() E - reset func(E) + reset func(E) bool } func (pool *pool[E]) get() E { @@ -425,6 +436,7 @@ func (pool *pool[E]) get() E { } func (p *pool[E]) put(v E) { - p.reset(v) - p.p.Put(v) + if p.reset(v) { + p.p.Put(v) + } } From 057937db27165ec3f9a6b9cdaf9640ebda039914 Mon Sep 17 00:00:00 2001 From: merlin Date: Sun, 20 Oct 2024 18:00:59 +0300 Subject: [PATCH 15/18] add prefix to quoters tests --- internal/sanitize/sanitize_fuzz_test.go | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/internal/sanitize/sanitize_fuzz_test.go b/internal/sanitize/sanitize_fuzz_test.go index 746558276..a8f2e7791 100644 --- a/internal/sanitize/sanitize_fuzz_test.go +++ b/internal/sanitize/sanitize_fuzz_test.go @@ -7,17 +7,22 @@ import ( ) func FuzzQuoteString(f *testing.F) { - f.Add("") - f.Add("\n") + const prefix = "prefix" + f.Add("new\nline") f.Add("sample text") f.Add("sample q'u'o't'e's") f.Add("select 'quoted $42', $1") f.Fuzz(func(t *testing.T, input string) { - got := sanitize.QuoteString(nil, input) + got := string(sanitize.QuoteString([]byte(prefix), input)) want := oldQuoteString(input) - if want != string(got) { + quoted, ok := strings.CutPrefix(got, prefix) + if !ok { + t.Fatalf("result has no prefix") + } + + if want != quoted { t.Errorf("got %q", got) t.Fatalf("want %q", want) } @@ -25,6 +30,7 @@ func FuzzQuoteString(f *testing.F) { } func FuzzQuoteBytes(f *testing.F) { + const prefix = "prefix" f.Add([]byte(nil)) f.Add([]byte("\n")) f.Add([]byte("sample text")) @@ -32,10 +38,15 @@ func FuzzQuoteBytes(f *testing.F) { f.Add([]byte("select 'quoted $42', $1")) f.Fuzz(func(t *testing.T, input []byte) { - got := sanitize.QuoteBytes(nil, input) + got := string(sanitize.QuoteBytes([]byte(prefix), input)) want := oldQuoteBytes(input) - if want != string(got) { + quoted, ok := strings.CutPrefix(got, prefix) + if !ok { + t.Fatalf("result has no prefix") + } + + if want != quoted { t.Errorf("got %q", got) t.Fatalf("want %q", want) } From 120c89fe0dc063d8a078298f27953d258643e783 Mon Sep 17 00:00:00 2001 From: merlin Date: Sun, 20 Oct 2024 18:08:23 +0300 Subject: [PATCH 16/18] fix preallocations of quoted string --- internal/sanitize/sanitize.go | 2 +- internal/sanitize/sanitize_fuzz_test.go | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/sanitize/sanitize.go b/internal/sanitize/sanitize.go index 173523d95..e0ae9bedb 100644 --- a/internal/sanitize/sanitize.go +++ b/internal/sanitize/sanitize.go @@ -151,7 +151,7 @@ func QuoteString(dst []byte, str string) []byte { dst = append(dst, quote...) - p := slices.Grow(dst[len(dst):], len(str)+2*n) + p := slices.Grow(dst[len(dst):], 2*len(quote)+len(str)+2*n) for len(str) > 0 { i := strings.Index(str, quote) diff --git a/internal/sanitize/sanitize_fuzz_test.go b/internal/sanitize/sanitize_fuzz_test.go index a8f2e7791..2f0c41223 100644 --- a/internal/sanitize/sanitize_fuzz_test.go +++ b/internal/sanitize/sanitize_fuzz_test.go @@ -1,6 +1,7 @@ package sanitize_test import ( + "strings" "testing" "github.com/jackc/pgx/v5/internal/sanitize" From da0315d1a47fc13432afeb90596028ed6ca6cd9d Mon Sep 17 00:00:00 2001 From: merlin Date: Mon, 9 Dec 2024 16:33:57 +0200 Subject: [PATCH 17/18] optimisations of quote functions by @sean- --- internal/sanitize/benchmmark.sh | 3 +- internal/sanitize/sanitize.go | 62 +++++++++++++++++++++------------ 2 files changed, 42 insertions(+), 23 deletions(-) diff --git a/internal/sanitize/benchmmark.sh b/internal/sanitize/benchmmark.sh index 06842c0aa..ec0f7b03a 100644 --- a/internal/sanitize/benchmmark.sh +++ b/internal/sanitize/benchmmark.sh @@ -43,7 +43,7 @@ for i in "${!commits[@]}"; do } # Sanitized commmit message - commit_message=$(git log -1 --pretty=format:"%s" | tr ' ' '_') + commit_message=$(git log -1 --pretty=format:"%s" | tr -c '[:alnum:]-_' '_') # Benchmark data will go there bench_file="${benchmarks_dir}/${i}_${commit_message}.bench" @@ -56,4 +56,5 @@ for i in "${!commits[@]}"; do bench_files+=("$bench_file") done +# go install golang.org/x/perf/cmd/benchstat[@latest] benchstat "${bench_files[@]}" diff --git a/internal/sanitize/sanitize.go b/internal/sanitize/sanitize.go index e0ae9bedb..b516817cb 100644 --- a/internal/sanitize/sanitize.go +++ b/internal/sanitize/sanitize.go @@ -145,41 +145,59 @@ func (q *Query) init(sql string) { } func QuoteString(dst []byte, str string) []byte { - const quote = "'" + const quote = '\'' - n := strings.Count(str, quote) + // Preallocate space for the worst case scenario + dst = slices.Grow(dst, len(str)*2+2) - dst = append(dst, quote...) + // Add opening quote + dst = append(dst, quote) - p := slices.Grow(dst[len(dst):], 2*len(quote)+len(str)+2*n) - - for len(str) > 0 { - i := strings.Index(str, quote) - if i < 0 { - p = append(p, str...) - break + // Iterate through the string without allocating + for i := 0; i < len(str); i++ { + if str[i] == quote { + dst = append(dst, quote, quote) + } else { + dst = append(dst, str[i]) } - p = append(p, str[:i]...) - p = append(p, "''"...) - str = str[i+1:] } - dst = append(dst, p...) - - dst = append(dst, quote...) + // Add closing quote + dst = append(dst, quote) return dst } func QuoteBytes(dst, buf []byte) []byte { - dst = append(dst, `'\x`...) + if len(buf) == 0 { + return append(dst, `'\x'`...) + } + + // Calculate required length + requiredLen := 3 + hex.EncodedLen(len(buf)) + 1 + + // Ensure dst has enough capacity + if cap(dst)-len(dst) < requiredLen { + newDst := make([]byte, len(dst), len(dst)+requiredLen) + copy(newDst, dst) + dst = newDst + } + + // Record original length and extend slice + origLen := len(dst) + dst = dst[:origLen+requiredLen] + + // Add prefix + dst[origLen] = '\'' + dst[origLen+1] = '\\' + dst[origLen+2] = 'x' + + // Encode bytes directly into dst + hex.Encode(dst[origLen+3:len(dst)-1], buf) - n := hex.EncodedLen(len(buf)) - p := slices.Grow(dst[len(dst):], n)[:n] - hex.Encode(p, buf) - dst = append(dst, p...) + // Add suffix + dst[len(dst)-1] = '\'' - dst = append(dst, `'`...) return dst } From e452f80b1d21846f931641099c2ef2c0a0873cf2 Mon Sep 17 00:00:00 2001 From: merlin Date: Sat, 28 Dec 2024 13:39:01 +0200 Subject: [PATCH 18/18] TestErrNoRows: remove bad test case --- conn_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/conn_test.go b/conn_test.go index 200ecc1a6..d51f829b9 100644 --- a/conn_test.go +++ b/conn_test.go @@ -1417,5 +1417,4 @@ func TestErrNoRows(t *testing.T) { require.Equal(t, "no rows in result set", pgx.ErrNoRows.Error()) require.ErrorIs(t, pgx.ErrNoRows, sql.ErrNoRows, "pgx.ErrNowRows must match sql.ErrNoRows") - require.ErrorIs(t, pgx.ErrNoRows, pgx.ErrNoRows, "sql.ErrNowRows must match pgx.ErrNoRows") }