Skip to content

Commit

Permalink
zstd: Minor performance tweaks (#420)
Browse files Browse the repository at this point in the history
These seem better in Go 1.17
  • Loading branch information
klauspost authored Jan 11, 2022
1 parent 469ba13 commit 4a32b53
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 152 deletions.
11 changes: 5 additions & 6 deletions zstd/blockenc.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func (b *blockEnc) init() {
if cap(b.literals) < maxCompressedBlockSize {
b.literals = make([]byte, 0, maxCompressedBlockSize)
}
const defSeqs = 200
const defSeqs = 2000
if cap(b.sequences) < defSeqs {
b.sequences = make([]seq, 0, defSeqs)
}
Expand Down Expand Up @@ -426,7 +426,7 @@ func fuzzFseEncoder(data []byte) int {
return 0
}
enc := fseEncoder{}
hist := enc.Histogram()[:256]
hist := enc.Histogram()
maxSym := uint8(0)
for i, v := range data {
v = v & 63
Expand Down Expand Up @@ -802,14 +802,13 @@ func (b *blockEnc) genCodes() {
// nothing to do
return
}

if len(b.sequences) > math.MaxUint16 {
panic("can only encode up to 64K sequences")
}
// No bounds checks after here:
llH := b.coders.llEnc.Histogram()[:256]
ofH := b.coders.ofEnc.Histogram()[:256]
mlH := b.coders.mlEnc.Histogram()[:256]
llH := b.coders.llEnc.Histogram()
ofH := b.coders.ofEnc.Histogram()
mlH := b.coders.mlEnc.Histogram()
for i := range llH {
llH[i] = 0
}
Expand Down
24 changes: 17 additions & 7 deletions zstd/enc_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,6 @@ func (e *fastBase) UseBlock(enc *blockEnc) {
e.blk = enc
}

func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 {
// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
}

func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
if debugAsserts {
if s < 0 {
Expand All @@ -131,9 +126,24 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
}
}
a := src[s:]
b := src[t:]
b = b[:len(a)]
end := int32((len(a) >> 3) << 3)
for i := int32(0); i < end; i += 8 {
if diff := load6432(a, i) ^ load6432(b, i); diff != 0 {
return i + int32(bits.TrailingZeros64(diff)>>3)
}
}

// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
a = a[end:]
b = b[end:]
for i := range a {
if a[i] != b[i] {
return int32(i) + end
}
}
return int32(len(a)) + end
}

// Reset the encoding table.
Expand Down
139 changes: 9 additions & 130 deletions zstd/enc_fast.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ package zstd

import (
"fmt"
"math"
"math/bits"
)

const (
Expand Down Expand Up @@ -136,20 +134,7 @@ encodeLoop:
// Consider history as well.
var seq seq
var length int32
// length = 4 + e.matchlen(s+6, repIndex+4, src)
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}

length = 4 + e.matchlen(s+6, repIndex+4, src)
seq.matchLen = uint32(length - zstdMinMatch)

// We might be able to match backwards.
Expand Down Expand Up @@ -236,20 +221,7 @@ encodeLoop:
}

// Extend the 4-byte match as long as possible.
//l := e.matchlen(s+4, t+4, src) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := e.matchlen(s+4, t+4, src) + 4

// Extend backwards
tMin := s - e.maxMatchOff
Expand Down Expand Up @@ -286,20 +258,7 @@ encodeLoop:
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlen(s+4, o2+4, src)
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := 4 + e.matchlen(s+4, o2+4, src)

// Store this, since we have it.
nextHash := hashLen(cv, hashLog, tableFastHashLen)
Expand Down Expand Up @@ -418,21 +377,7 @@ encodeLoop:
if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well.
var seq seq
// length := 4 + e.matchlen(s+6, repIndex+4, src)
// length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
var length int32
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
length := 4 + e.matchlen(s+6, repIndex+4, src)

seq.matchLen = uint32(length - zstdMinMatch)

Expand Down Expand Up @@ -522,21 +467,7 @@ encodeLoop:
panic(fmt.Sprintf("t (%d) < 0 ", t))
}
// Extend the 4-byte match as long as possible.
//l := e.matchlenNoHist(s+4, t+4, src) + 4
// l := int32(matchLen(src[s+4:], src[t+4:])) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := e.matchlen(s+4, t+4, src) + 4

// Extend backwards
tMin := s - e.maxMatchOff
Expand Down Expand Up @@ -573,21 +504,7 @@ encodeLoop:
if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlenNoHist(s+4, o2+4, src)
// l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := 4 + e.matchlen(s+4, o2+4, src)

// Store this, since we have it.
nextHash := hashLen(cv, hashLog, tableFastHashLen)
Expand Down Expand Up @@ -731,19 +648,7 @@ encodeLoop:
// Consider history as well.
var seq seq
var length int32
// length = 4 + e.matchlen(s+6, repIndex+4, src)
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
length = 4 + e.matchlen(s+6, repIndex+4, src)

seq.matchLen = uint32(length - zstdMinMatch)

Expand Down Expand Up @@ -831,20 +736,7 @@ encodeLoop:
}

// Extend the 4-byte match as long as possible.
//l := e.matchlen(s+4, t+4, src) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := e.matchlen(s+4, t+4, src) + 4

// Extend backwards
tMin := s - e.maxMatchOff
Expand Down Expand Up @@ -881,20 +773,7 @@ encodeLoop:
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlen(s+4, o2+4, src)
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := 4 + e.matchlen(s+4, o2+4, src)

// Store this, since we have it.
nextHash := hashLen(cv, hashLog, tableFastHashLen)
Expand Down
12 changes: 6 additions & 6 deletions zstd/encoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func getEncOpts(cMax int) []testEncOpt {
addOpt("nolit", WithNoEntropyCompression(true))
addOpt("pad1k", WithEncoderPadding(1024))
addOpt("zerof", WithZeroFrames(true))
addOpt("singleseg", WithSingleSegment(true))
addOpt("1seg", WithSingleSegment(true))
}
if testing.Short() && conc == 2 {
break
Expand Down Expand Up @@ -904,10 +904,10 @@ func BenchmarkEncoder_EncodeAllXML(b *testing.B) {
}
dec.Close()

enc := Encoder{}
enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
dst := enc.EncodeAll(in, nil)
wantSize := len(dst)
b.Log("Output size:", len(dst))
//b.Log("Output size:", len(dst))
b.ResetTimer()
b.ReportAllocs()
b.SetBytes(int64(len(in)))
Expand Down Expand Up @@ -994,7 +994,7 @@ func BenchmarkEncoder_EncodeAllHTML(b *testing.B) {
b.Fatal(err)
}

enc := Encoder{}
enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
dst := enc.EncodeAll(in, nil)
wantSize := len(dst)
b.ResetTimer()
Expand All @@ -1018,7 +1018,7 @@ func BenchmarkEncoder_EncodeAllTwain(b *testing.B) {
b.Fatal(err)
}

enc := Encoder{}
enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
dst := enc.EncodeAll(in, nil)
wantSize := len(dst)
b.ResetTimer()
Expand All @@ -1042,7 +1042,7 @@ func BenchmarkEncoder_EncodeAllPi(b *testing.B) {
b.Fatal(err)
}

enc := Encoder{}
enc, _ := NewWriter(nil, WithEncoderConcurrency(1))
dst := enc.EncodeAll(in, nil)
wantSize := len(dst)
b.ResetTimer()
Expand Down
5 changes: 2 additions & 3 deletions zstd/fse_encoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,8 @@ func (s symbolTransform) String() string {
// To indicate that you have populated the histogram call HistogramFinished
// with the value of the highest populated symbol, as well as the number of entries
// in the most populated entry. These are accepted at face value.
// The returned slice will always be length 256.
func (s *fseEncoder) Histogram() []uint32 {
return s.count[:]
func (s *fseEncoder) Histogram() *[256]uint32 {
return &s.count
}

// HistogramFinished can be called to indicate that the histogram has been populated.
Expand Down

0 comments on commit 4a32b53

Please sign in to comment.