Skip to content

Commit

Permalink
Use short emit and reverse indexing order.
Browse files Browse the repository at this point in the history
  • Loading branch information
klauspost committed Jun 24, 2024
1 parent 5338c23 commit d470d00
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 5 deletions.
3 changes: 2 additions & 1 deletion s2/dict.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ func (d *Dict) initROLZ() {
for c := range d.rolzTab {
filled := 0
nextEntry:
for i := range d.dict[:len(d.dict)-4] {
for i := len(d.dict) - 4; i >= 0; i-- {
//for i := range d.dict[:len(d.dict)-4] {
if binary.LittleEndian.Uint16(d.dict[i:]) == uint16(c) {
// Don't fill the same 2 bytes several times.
const matchMask = (1 << (4 * 8)) - 1
Expand Down
73 changes: 69 additions & 4 deletions s2/encode_best.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ func encodeBlockBest(dst, src []byte, dict *Dict) (d int) {
score := m.length - m.s
if m.rolz {
// One byte to emit.
return score - 1 - emitRepeatSize(65536, m.length-10)
return score - 1 - emitRepeatShortSize(m.length-10)
}
if nextEmit == m.s {
// If we do not have to emit literals, we save 1 byte
Expand Down Expand Up @@ -301,9 +301,11 @@ func encodeBlockBest(dst, src []byte, dict *Dict) (d int) {
best = bestOf(best, matchDict(int(candidateL>>16), s, uint32(cv), false))
best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
best = bestOf(best, matchDict(int(candidateS>>16), s, uint32(cv), false))
//if nextEmit < s && s >= 2 {
if s >= 2 {
best = bestOf(best, matchDictROLZ(s-2, load32(src, s-2)))
// Enable ROLZ after copy. Requires different encoding.
if true || nextEmit < s-2 {
best = bestOf(best, matchDictROLZ(s-2, load32(src, s-2)))
}
best = bestOf(best, matchDictROLZ(s-1, load32(src, s-1)))
}
best = bestOf(best, matchDictROLZ(s, uint32(cv)))
Expand Down Expand Up @@ -523,7 +525,7 @@ emitRemainder:
func emitROLZ(d []byte, idx uint8, length int) int {
if length > 10 {
d[0] = uint8(idx<<5) | (0x7 << 2)
return emitRepeat(d[1:], -1, length-10) + 1
return emitRepeatShort(d[1:], length-10) + 1
}
d[0] = idx<<5 | uint8(length-3)<<2
return 1
Expand Down Expand Up @@ -879,3 +881,66 @@ func emitRepeatSize(offset, length int) int {
}
return 5
}

func emitRepeatShort(dst []byte, length int) int {
// Repeat offset, make length cheaper
const debugEncode = false
const tagRepeat = 3 | 4
if debugEncode {
fmt.Println("(repeat)", length)
}
if length <= 0 {
return 0
}
length--
if debugEncode && length < 0 {
panic(fmt.Sprintf("invalid length %d", length))
}
if length <= 28 {
dst[0] = uint8(length)<<3 | tagRepeat
return 1
}
length -= 28
if length < 256 {
dst[1] = uint8(length >> 0)
dst[0] = 29<<3 | tagRepeat
return 2
}

if length < 65536 {
dst[2] = uint8(length >> 8)
dst[1] = uint8(length >> 0)
dst[0] = 30<<3 | tagRepeat
return 3
}
dst[3] = uint8(length >> 16)
dst[2] = uint8(length >> 8)
dst[1] = uint8(length >> 0)
dst[0] = 31<<3 | tagRepeat
return 4
}

// emitRepeatShortSize returns the number of bytes required to encode a repeat.
// Length must be at least 1 and < 1<<24
func emitRepeatShortSize(length int) int {
if length <= 0 {
return 0
}

if length <= 29 {
return 1
}
length -= 29
if length <= 256 {
return 2
}
if length <= 65536 {
return 3
}
const maxRepeat = (1 << 24) - 1
left := 0
if length > maxRepeat {
left = length - maxRepeat
}
return 4 + emitRepeatShortSize(left)
}

0 comments on commit d470d00

Please sign in to comment.