From 41d0d7c3fe4f0422d8671b0f6d1df84ca7935bd8 Mon Sep 17 00:00:00 2001 From: Oleg Kovalov Date: Thu, 20 Jun 2024 13:12:31 +0200 Subject: [PATCH] perf: faster hash marshaling methods (#183) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce a bit of allocations and cpu time in Hash arshaling methods. Same behaviour as before but slightly in another way: instead of operations on string we can do this on []byte. As a result we now have tests for that. ``` go-header % go-perftuner bstat a.txt b.txt args: [a.txt b.txt]name old time/op new time/op delta HashMarshaling/String-10 798ns ± 0% 613ns ± 0% -23.08% (p=0.002 n=6+6) HashMarshaling/Marshal-10 1.11µs ± 0% 0.84µs ± 0% -24.55% (p=0.004 n=5+6) HashMarshaling/Unmarshal-10 333ns ±11% 277ns ± 1% -16.92% (p=0.004 n=5+6) name old alloc/op new alloc/op delta HashMarshaling/String-10 192B ± 0% 128B ± 0% -33.33% (p=0.002 n=6+6) HashMarshaling/Marshal-10 296B ± 0% 104B ± 0% -64.86% (p=0.002 n=6+6) HashMarshaling/Unmarshal-10 128B ± 0% 32B ± 0% -75.00% (p=0.002 n=6+6) name old allocs/op new allocs/op delta HashMarshaling/String-10 3.00 ± 0% 2.00 ± 0% ~ (p=0.002 n=6+6) HashMarshaling/Marshal-10 5.00 ± 0% 2.00 ± 0% -60.00% (p=0.002 n=6+6) HashMarshaling/Unmarshal-10 2.00 ± 0% 1.00 ± 0% ~ (p=0.002 n=6+6) ``` --- hash.go | 35 ++++++++++++++++++------- hash_test.go | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 10 deletions(-) create mode 100644 hash_test.go diff --git a/hash.go b/hash.go index 156cd2bb..88c60138 100644 --- a/hash.go +++ b/hash.go @@ -3,7 +3,6 @@ package header import ( "encoding/hex" "fmt" - "strings" ) // Hash represents cryptographic hash and provides basic serialization functions. @@ -11,17 +10,20 @@ type Hash []byte // String implements fmt.Stringer interface. func (h Hash) String() string { - return strings.ToUpper(hex.EncodeToString(h)) + buf := make([]byte, hex.EncodedLen(len(h))) + hex.Encode(buf, h) + hexToUpper(buf) + return string(buf) } // MarshalJSON serializes Hash into valid JSON. func (h Hash) MarshalJSON() ([]byte, error) { - s := strings.ToUpper(hex.EncodeToString(h)) - jbz := make([]byte, len(s)+2) - jbz[0] = '"' - copy(jbz[1:], s) - jbz[len(jbz)-1] = '"' - return jbz, nil + buf := make([]byte, 2+hex.EncodedLen(len(h))) + buf[0] = '"' + hex.Encode(buf[1:], h) + hexToUpper(buf) + buf[len(buf)-1] = '"' + return buf, nil } // UnmarshalJSON deserializes JSON representation of a Hash into object. @@ -29,10 +31,23 @@ func (h *Hash) UnmarshalJSON(data []byte) error { if len(data) < 2 || data[0] != '"' || data[len(data)-1] != '"' { return fmt.Errorf("invalid hex string: %s", data) } - bz2, err := hex.DecodeString(string(data[1 : len(data)-1])) + + buf := make([]byte, hex.DecodedLen(len(data)-2)) + _, err := hex.Decode(buf, data[1:len(data)-1]) if err != nil { return err } - *h = bz2 + *h = buf return nil } + +// because we encode hex (alphabet: 0-9a-f) we can do this inplace. +func hexToUpper(b []byte) { + for i := 0; i < len(b); i++ { + c := b[i] + if 'a' <= c && c <= 'z' { + c -= 'a' - 'A' + } + b[i] = c + } +} diff --git a/hash_test.go b/hash_test.go new file mode 100644 index 00000000..04ce9e86 --- /dev/null +++ b/hash_test.go @@ -0,0 +1,74 @@ +package header + +import ( + "crypto/rand" + "crypto/sha256" + "encoding/hex" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestHash(t *testing.T) { + h := randHash() + + buf, err := h.MarshalJSON() + require.NoError(t, err) + + var h2 Hash + err = h2.UnmarshalJSON(buf) + require.NoError(t, err) + + require.Equal(t, h.String(), h2.String()) +} + +func BenchmarkHashMarshaling(b *testing.B) { + h := randHash() + + golden, err := h.MarshalJSON() + require.NoError(b, err) + + b.ResetTimer() + + b.Run("String", func(b *testing.B) { + wantSize := hex.EncodedLen(len(h)) + + for i := 0; i < b.N; i++ { + ln := len(h.String()) + require.Equal(b, ln, wantSize) + } + }) + + b.Run("Marshal", func(b *testing.B) { + for i := 0; i < b.N; i++ { + buf, err := h.MarshalJSON() + require.NoError(b, err) + require.NotZero(b, buf) + } + }) + + b.Run("Unmarshal", func(b *testing.B) { + var h2 Hash + + for i := 0; i < b.N; i++ { + err := h2.UnmarshalJSON(golden) + require.NoError(b, err) + } + }) +} + +func Fuzz_hexToUpper(f *testing.F) { + f.Add([]byte("48656c6c6f20476f7068657221")) + + f.Fuzz(func(t *testing.T, buf []byte) { + hexToUpper(buf) + }) +} + +func randHash() Hash { + var buf [sha256.Size]byte + if _, err := rand.Read(buf[:]); err != nil { + panic(err) + } + return Hash(buf[:]) +}