Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: optimise reading ints and longs #348

Merged
merged 1 commit into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 63 additions & 23 deletions reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,46 +139,86 @@ func (r *Reader) ReadBool() bool {

// ReadInt reads an Int from the Reader.
func (r *Reader) ReadInt() int32 {
var val uint32
var offset int8
if r.Error != nil {
return 0
}

var (
n int
v uint32
s uint8
)

for {
tail := r.tail
if r.tail-r.head+n > maxIntBufSize {
tail = r.head + maxIntBufSize - n
}

for r.Error == nil {
if offset == maxIntBufSize {
// Consume what it is in the buffer.
for i, b := range r.buf[r.head:tail] {
v |= uint32(b&0x7f) << s
if b&0x80 == 0 {
r.head += i + 1
return int32((v >> 1) ^ -(v & 1))
}
s += 7
n++
}
if n >= maxIntBufSize {
r.ReportError("ReadInt", "int overflow")
return 0
}
r.head += n

b := r.readByte()
val |= uint32(b&0x7F) << uint(7*offset)
if b&0x80 == 0 {
break
// We ran out of buffer and are not at the end of the int,
// Read more into the buffer.
if !r.loadMore() {
return 0
}
offset++
}

return int32((val >> 1) ^ -(val & 1))
}

// ReadLong reads a Long from the Reader.
func (r *Reader) ReadLong() int64 {
var val uint64
var offset int8
if r.Error != nil {
return 0
}

var (
n int
v uint64
s uint8
)

for {
tail := r.tail
if r.tail-r.head+n > maxLongBufSize {
tail = r.head + maxLongBufSize - n
}

for r.Error == nil {
if offset == maxLongBufSize {
r.ReportError("ReadLong", "long overflow")
// Consume what it is in the buffer.
for i, b := range r.buf[r.head:tail] {
v |= uint64(b&0x7f) << s
if b&0x80 == 0 {
r.head += i + 1
return int64((v >> 1) ^ -(v & 1))
}
s += 7
n++
}
if n >= maxLongBufSize {
r.ReportError("ReadLong", "int overflow")
return 0
}
r.head += n

b := r.readByte()
val |= uint64(b&0x7F) << uint(7*offset)
if b&0x80 == 0 {
break
// We ran out of buffer and are not at the end of the long,
// Read more into the buffer.
if !r.loadMore() {
return 0
}
offset++
}

return int64((val >> 1) ^ -(val & 1))
}

// ReadFloat reads a Float from the Reader.
Expand Down
20 changes: 6 additions & 14 deletions reader_skip.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,33 +28,25 @@ func (r *Reader) SkipBool() {

// SkipInt skips an Int in the reader.
func (r *Reader) SkipInt() {
var offset int8
for r.Error == nil {
if offset == maxIntBufSize {
return
}

var n int
for r.Error == nil && n < maxIntBufSize {
b := r.readByte()
if b&0x80 == 0 {
break
}
offset++
n++
}
}

// SkipLong skips a Long in the reader.
func (r *Reader) SkipLong() {
var offset int8
for r.Error == nil {
if offset == maxLongBufSize {
return
}

var n int
for r.Error == nil && n < maxLongBufSize {
b := r.readByte()
if b&0x80 == 0 {
break
}
offset++
n++
}
}

Expand Down
56 changes: 34 additions & 22 deletions reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ func TestReader_ReadInt(t *testing.T) {
},
{
name: "negative int",
data: []byte{0x0F},
data: []byte{0x0f},
want: -8,
wantErr: require.NoError,
},
Expand All @@ -183,7 +183,7 @@ func TestReader_ReadInt(t *testing.T) {
},
{
name: "negative 64",
data: []byte{0x7F},
data: []byte{0x7f},
want: -64,
wantErr: require.NoError,
},
Expand All @@ -195,34 +195,32 @@ func TestReader_ReadInt(t *testing.T) {
},
{
name: "large int",
data: []byte{0xAA, 0xB4, 0xDE, 0x75},
data: []byte{0xaa, 0xb4, 0xde, 0x75},
want: 123456789,
wantErr: require.NoError,
},
{
name: "larger int",
data: []byte{0xE2, 0xA2, 0xF3, 0xAD, 0x07},
data: []byte{0xe2, 0xa2, 0xf3, 0xad, 0x07},
want: 987654321,
wantErr: require.NoError,
},
{
name: "overflow",
data: []byte{0xE2, 0xA2, 0xF3, 0xAD, 0xAD, 0xAD},
data: []byte{0xe2, 0xa2, 0xf3, 0xad, 0xad, 0xad},
want: 0,
wantErr: require.Error,
},
{
name: "eof",
data: []byte{0xE2},
want: 49,
data: []byte{0xe2},
want: 0,
wantErr: require.Error,
},
}

for _, test := range tests {
test := test
t.Run(test.name, func(t *testing.T) {

r := avro.NewReader(bytes.NewReader(test.data), 10)

got := r.ReadInt()
Expand All @@ -235,85 +233,99 @@ func TestReader_ReadInt(t *testing.T) {

func TestReader_ReadLong(t *testing.T) {
tests := []struct {
name string
data []byte
want int64
wantErr require.ErrorAssertionFunc
}{
{
name: "long",
data: []byte{0x36},
want: 27,
wantErr: require.NoError,
},
{
data: []byte{0x0F},
name: "negative long",
data: []byte{0x0f},
want: -8,
wantErr: require.NoError,
},
{
name: "negative long",
data: []byte{0x01},
want: -1,
wantErr: require.NoError,
},
{
name: "zero",
data: []byte{0x00},
want: 0,
wantErr: require.NoError,
},
{
name: "one",
data: []byte{0x02},
want: 1,
wantErr: require.NoError,
},
{
data: []byte{0x7F},
name: "negative 64",
data: []byte{0x7f},
want: -64,
wantErr: require.NoError,
},
{
name: "multi-byte",
data: []byte{0x80, 0x01},
want: 64,
wantErr: require.NoError,
},
{
data: []byte{0xAA, 0xB4, 0xDE, 0x75},
name: "large long",
data: []byte{0xaa, 0xb4, 0xde, 0x75},
want: 123456789,
wantErr: require.NoError,
},
{
data: []byte{0xE2, 0xA2, 0xF3, 0xAD, 0x07},
name: "larger long",
data: []byte{0xe2, 0xa2, 0xf3, 0xad, 0x07},
want: 987654321,
wantErr: require.NoError,
},
{
data: []byte{0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01},
name: "very very big long",
data: []byte{0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01},
want: 9223372036854775807,
wantErr: require.NoError,
},
{
data: []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01},
name: "very very big negative long",
data: []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01},
want: -9223372036854775808,
wantErr: require.NoError,
},
{
data: []byte{0xBD, 0xB1, 0xAE, 0xD4, 0xD2, 0xCD, 0xBD, 0xE4, 0x97, 0x01},
name: "very very big negative long",
data: []byte{0xbd, 0xb1, 0xae, 0xd4, 0xd2, 0xcd, 0xbd, 0xe4, 0x97, 0x01},
want: -5468631321897454687,
wantErr: require.NoError,
},
{
data: []byte{0xE2, 0xA2, 0xF3, 0xAD, 0xAD, 0xAD, 0xE2, 0xA2, 0xF3, 0xAD, 0xAD}, // Overflow
name: "overflow",
data: []byte{0xe2, 0xa2, 0xf3, 0xad, 0xad, 0xad, 0xe2, 0xa2, 0xf3, 0xad, 0xad},
want: 0,
wantErr: require.Error,
},
{
data: []byte{0xE2}, // io.EOF
want: 49,
name: "eof",
data: []byte{0xe2},
want: 0,
wantErr: require.Error,
},
}

for i, test := range tests {
test := test
t.Run(strconv.Itoa(i), func(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
r := avro.NewReader(bytes.NewReader(test.data), 10)

got := r.ReadLong()
Expand Down
Loading