Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for Go int encoding/decoding into/from Avro long #422

Merged
merged 2 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 34 additions & 29 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,35 +68,40 @@ More examples in the [godoc](https://pkg.go.dev/github.com/hamba/avro/v2).

#### Types Conversions

| Avro | Go Struct | Go Interface |
|-------------------------------|--------------------------------------------------------|--------------------------|
| `null` | `nil` | `nil` |
| `boolean` | `bool` | `bool` |
| `bytes` | `[]byte` | `[]byte` |
| `float` | `float32` | `float32` |
| `double` | `float64` | `float64` |
| `long` | `int64`, `uint32`\* | `int64`, `uint32` |
| `int` | `int`, `int32`, `int16`, `int8`, `uint8`\*, `uint16`\* | `int`, `uint8`, `uint16` |
| `fixed` | `uint64` | `uint64` |
| `string` | `string` | `string` |
| `array` | `[]T` | `[]any` |
| `enum` | `string` | `string` |
| `fixed` | `[n]byte` | `[n]byte` |
| `map` | `map[string]T{}` | `map[string]any` |
| `record` | `struct` | `map[string]any` |
| `union` | *see below* | *see below* |
| `int.date` | `time.Time` | `time.Time` |
| `int.time-millis` | `time.Duration` | `time.Duration` |
| `long.time-micros` | `time.Duration` | `time.Duration` |
| `long.timestamp-millis` | `time.Time` | `time.Time` |
| `long.timestamp-micros` | `time.Time` | `time.Time` |
| `long.local-timestamp-millis` | `time.Time` | `time.Time` |
| `long.local-timestamp-micros` | `time.Time` | `time.Time` |
| `bytes.decimal` | `*big.Rat` | `*big.Rat` |
| `fixed.decimal` | `*big.Rat` | `*big.Rat` |
| `string.uuid` | `string` | `string` |

\* Please note that when the Go type is an unsigned integer care must be taken to ensure that information is not lost
| Avro | Go Struct | Go Interface |
|-------------------------------|------------------------------------------------------------|--------------------------|
| `null` | `nil` | `nil` |
| `boolean` | `bool` | `bool` |
| `bytes` | `[]byte` | `[]byte` |
| `float` | `float32` | `float32` |
| `double` | `float64` | `float64` |
| `long` | `int`\*, `int64`, `uint32`\** | `int`, `int64`, `uint32` |
| `int` | `int`\*, `int32`, `int16`, `int8`, `uint8`\**, `uint16`\** | `int`, `uint8`, `uint16` |
| `fixed` | `uint64` | `uint64` |
| `string` | `string` | `string` |
| `array` | `[]T` | `[]any` |
| `enum` | `string` | `string` |
| `fixed` | `[n]byte` | `[n]byte` |
| `map` | `map[string]T{}` | `map[string]any` |
| `record` | `struct` | `map[string]any` |
| `union` | *see below* | *see below* |
| `int.date` | `time.Time` | `time.Time` |
| `int.time-millis` | `time.Duration` | `time.Duration` |
| `long.time-micros` | `time.Duration` | `time.Duration` |
| `long.timestamp-millis` | `time.Time` | `time.Time` |
| `long.timestamp-micros` | `time.Time` | `time.Time` |
| `long.local-timestamp-millis` | `time.Time` | `time.Time` |
| `long.local-timestamp-micros` | `time.Time` | `time.Time` |
| `bytes.decimal` | `*big.Rat` | `*big.Rat` |
| `fixed.decimal` | `*big.Rat` | `*big.Rat` |
| `string.uuid` | `string` | `string` |

\* Please note that the size of the Go type `int` is platform dependent. Decoding an Avro `long` into a Go `int` is
only allowed on 64-bit platforms and will result in an error on 32-bit platforms. Similarly, be careful when encoding a
Go `int` using Avro `int` on a 64-bit platform, as that can result in an integer overflow causing misinterpretation of
the data.

\** Please note that when the Go type is an unsigned integer care must be taken to ensure that information is not lost
when converting between the Avro type and Go type. For example, storing a *negative* number in Avro of `int = -100`
would be interpreted as `uint16 = 65,436` in Go. Another example would be storing numbers in Avro `int = 256` that
are larger than the Go type `uint8 = 0`.
Expand Down
22 changes: 15 additions & 7 deletions codec_native.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"math/big"
"reflect"
"strconv"
"time"
"unsafe"

Expand All @@ -21,10 +22,15 @@ func createDecoderOfNative(schema *PrimitiveSchema, typ reflect2.Type) ValDecode
return &boolCodec{}

case reflect.Int:
if schema.Type() != Int {
break
switch schema.Type() {
case Int:
return &intCodec[int]{}
case Long:
if strconv.IntSize == 64 {
// allow decoding into int when it's 64-bit
return &longCodec[int]{}
}
}
return &intCodec[int]{}

case reflect.Int8:
if schema.Type() != Int {
Expand Down Expand Up @@ -183,10 +189,12 @@ func createEncoderOfNative(schema Schema, typ reflect2.Type) ValEncoder {
return &boolCodec{}

case reflect.Int:
if schema.Type() != Int {
break
switch schema.Type() {
case Int:
return &intCodec[int]{}
case Long:
return &longCodec[int]{}
}
return &intCodec[int]{}

case reflect.Int8:
if schema.Type() != Int {
Expand Down Expand Up @@ -367,7 +375,7 @@ func (*intCodec[T]) Encode(ptr unsafe.Pointer, w *Writer) {
}

type largeInt interface {
~int32 | ~uint32 | int64
~int | ~int32 | ~uint32 | int64
}

type longCodec[T largeInt] struct{}
Expand Down
26 changes: 23 additions & 3 deletions decoder_native_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package avro_test
import (
"bytes"
"math/big"
"strconv"
"testing"
"time"

Expand Down Expand Up @@ -68,7 +69,7 @@ func TestDecoder_BoolEof(t *testing.T) {
assert.Error(t, err)
}

func TestDecoder_Int(t *testing.T) {
func TestDecoder_Int_Int(t *testing.T) {
defer ConfigTeardown()

data := []byte{0x36}
Expand All @@ -83,6 +84,25 @@ func TestDecoder_Int(t *testing.T) {
assert.Equal(t, 27, i)
}

func TestDecoder_Int_Long(t *testing.T) {
if strconv.IntSize != 64 {
t.Skipf("int size is %d, skipping test", strconv.IntSize)
}

defer ConfigTeardown()

data := []byte{0x80, 0x80, 0x80, 0x80, 0x10}
schema := "long"
dec, err := avro.NewDecoder(schema, bytes.NewReader(data))
require.NoError(t, err)

var i int
err = dec.Decode(&i)

require.NoError(t, err)
assert.Equal(t, 2147483648, i)
}

func TestDecoder_IntShortRead(t *testing.T) {
defer ConfigTeardown()

Expand Down Expand Up @@ -288,7 +308,7 @@ func TestDecoder_Uint32InvalidSchema(t *testing.T) {
func TestDecoder_Int64(t *testing.T) {
defer ConfigTeardown()

data := []byte{0x36}
data := []byte{0x80, 0x80, 0x80, 0x80, 0x10}
schema := "long"
dec, err := avro.NewDecoder(schema, bytes.NewReader(data))
require.NoError(t, err)
Expand All @@ -297,7 +317,7 @@ func TestDecoder_Int64(t *testing.T) {
err = dec.Decode(&i)

require.NoError(t, err)
assert.Equal(t, int64(27), i)
assert.Equal(t, int64(2147483648), i)
}

func TestDecoder_Int64ShortRead(t *testing.T) {
Expand Down
14 changes: 14 additions & 0 deletions encoder_native_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,20 @@ func TestEncoder_Int64FromInt32(t *testing.T) {
assert.Equal(t, []byte{0x36}, buf.Bytes())
}

func TestEncoder_Int64FromInt(t *testing.T) {
defer ConfigTeardown()

schema := "long"
buf := bytes.NewBuffer([]byte{})
enc, err := avro.NewEncoder(schema, buf)
require.NoError(t, err)

err = enc.Encode(2147483648)

require.NoError(t, err)
assert.Equal(t, []byte{0x80, 0x80, 0x80, 0x80, 0x10}, buf.Bytes())
}

func TestEncoder_Int64InvalidSchema(t *testing.T) {
defer ConfigTeardown()

Expand Down
Loading