From dfd5956e73a9f345c7e7f8e5b5a8df209ff926fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lovro=20Ma=C5=BEgon?= Date: Mon, 29 Jul 2024 20:49:24 +0200 Subject: [PATCH] feat: support for Go `int` encoding/decoding into/from Avro `long` (#422) --- README.md | 63 +++++++++++++++++++++++------------------- codec_native.go | 22 ++++++++++----- decoder_native_test.go | 26 +++++++++++++++-- encoder_native_test.go | 14 ++++++++++ 4 files changed, 86 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 7cc0671..90a8666 100644 --- a/README.md +++ b/README.md @@ -68,35 +68,40 @@ More examples in the [godoc](https://pkg.go.dev/github.com/hamba/avro/v2). #### Types Conversions -| Avro | Go Struct | Go Interface | -|-------------------------------|--------------------------------------------------------|--------------------------| -| `null` | `nil` | `nil` | -| `boolean` | `bool` | `bool` | -| `bytes` | `[]byte` | `[]byte` | -| `float` | `float32` | `float32` | -| `double` | `float64` | `float64` | -| `long` | `int64`, `uint32`\* | `int64`, `uint32` | -| `int` | `int`, `int32`, `int16`, `int8`, `uint8`\*, `uint16`\* | `int`, `uint8`, `uint16` | -| `fixed` | `uint64` | `uint64` | -| `string` | `string` | `string` | -| `array` | `[]T` | `[]any` | -| `enum` | `string` | `string` | -| `fixed` | `[n]byte` | `[n]byte` | -| `map` | `map[string]T{}` | `map[string]any` | -| `record` | `struct` | `map[string]any` | -| `union` | *see below* | *see below* | -| `int.date` | `time.Time` | `time.Time` | -| `int.time-millis` | `time.Duration` | `time.Duration` | -| `long.time-micros` | `time.Duration` | `time.Duration` | -| `long.timestamp-millis` | `time.Time` | `time.Time` | -| `long.timestamp-micros` | `time.Time` | `time.Time` | -| `long.local-timestamp-millis` | `time.Time` | `time.Time` | -| `long.local-timestamp-micros` | `time.Time` | `time.Time` | -| `bytes.decimal` | `*big.Rat` | `*big.Rat` | -| `fixed.decimal` | `*big.Rat` | `*big.Rat` | -| `string.uuid` | `string` | `string` | - -\* Please note that when the Go type is an unsigned integer care must be taken to ensure that information is not lost +| Avro | Go Struct | Go Interface | +|-------------------------------|------------------------------------------------------------|--------------------------| +| `null` | `nil` | `nil` | +| `boolean` | `bool` | `bool` | +| `bytes` | `[]byte` | `[]byte` | +| `float` | `float32` | `float32` | +| `double` | `float64` | `float64` | +| `long` | `int`\*, `int64`, `uint32`\** | `int`, `int64`, `uint32` | +| `int` | `int`\*, `int32`, `int16`, `int8`, `uint8`\**, `uint16`\** | `int`, `uint8`, `uint16` | +| `fixed` | `uint64` | `uint64` | +| `string` | `string` | `string` | +| `array` | `[]T` | `[]any` | +| `enum` | `string` | `string` | +| `fixed` | `[n]byte` | `[n]byte` | +| `map` | `map[string]T{}` | `map[string]any` | +| `record` | `struct` | `map[string]any` | +| `union` | *see below* | *see below* | +| `int.date` | `time.Time` | `time.Time` | +| `int.time-millis` | `time.Duration` | `time.Duration` | +| `long.time-micros` | `time.Duration` | `time.Duration` | +| `long.timestamp-millis` | `time.Time` | `time.Time` | +| `long.timestamp-micros` | `time.Time` | `time.Time` | +| `long.local-timestamp-millis` | `time.Time` | `time.Time` | +| `long.local-timestamp-micros` | `time.Time` | `time.Time` | +| `bytes.decimal` | `*big.Rat` | `*big.Rat` | +| `fixed.decimal` | `*big.Rat` | `*big.Rat` | +| `string.uuid` | `string` | `string` | + +\* Please note that the size of the Go type `int` is platform dependent. Decoding an Avro `long` into a Go `int` is +only allowed on 64-bit platforms and will result in an error on 32-bit platforms. Similarly, be careful when encoding a +Go `int` using Avro `int` on a 64-bit platform, as that can result in an integer overflow causing misinterpretation of +the data. + +\** Please note that when the Go type is an unsigned integer care must be taken to ensure that information is not lost when converting between the Avro type and Go type. For example, storing a *negative* number in Avro of `int = -100` would be interpreted as `uint16 = 65,436` in Go. Another example would be storing numbers in Avro `int = 256` that are larger than the Go type `uint8 = 0`. diff --git a/codec_native.go b/codec_native.go index fd809cc..3678de6 100644 --- a/codec_native.go +++ b/codec_native.go @@ -4,6 +4,7 @@ import ( "fmt" "math/big" "reflect" + "strconv" "time" "unsafe" @@ -21,10 +22,15 @@ func createDecoderOfNative(schema *PrimitiveSchema, typ reflect2.Type) ValDecode return &boolCodec{} case reflect.Int: - if schema.Type() != Int { - break + switch schema.Type() { + case Int: + return &intCodec[int]{} + case Long: + if strconv.IntSize == 64 { + // allow decoding into int when it's 64-bit + return &longCodec[int]{} + } } - return &intCodec[int]{} case reflect.Int8: if schema.Type() != Int { @@ -183,10 +189,12 @@ func createEncoderOfNative(schema Schema, typ reflect2.Type) ValEncoder { return &boolCodec{} case reflect.Int: - if schema.Type() != Int { - break + switch schema.Type() { + case Int: + return &intCodec[int]{} + case Long: + return &longCodec[int]{} } - return &intCodec[int]{} case reflect.Int8: if schema.Type() != Int { @@ -367,7 +375,7 @@ func (*intCodec[T]) Encode(ptr unsafe.Pointer, w *Writer) { } type largeInt interface { - ~int32 | ~uint32 | int64 + ~int | ~int32 | ~uint32 | int64 } type longCodec[T largeInt] struct{} diff --git a/decoder_native_test.go b/decoder_native_test.go index 2e538f1..a8aa773 100644 --- a/decoder_native_test.go +++ b/decoder_native_test.go @@ -3,6 +3,7 @@ package avro_test import ( "bytes" "math/big" + "strconv" "testing" "time" @@ -68,7 +69,7 @@ func TestDecoder_BoolEof(t *testing.T) { assert.Error(t, err) } -func TestDecoder_Int(t *testing.T) { +func TestDecoder_Int_Int(t *testing.T) { defer ConfigTeardown() data := []byte{0x36} @@ -83,6 +84,25 @@ func TestDecoder_Int(t *testing.T) { assert.Equal(t, 27, i) } +func TestDecoder_Int_Long(t *testing.T) { + if strconv.IntSize != 64 { + t.Skipf("int size is %d, skipping test", strconv.IntSize) + } + + defer ConfigTeardown() + + data := []byte{0x80, 0x80, 0x80, 0x80, 0x10} + schema := "long" + dec, err := avro.NewDecoder(schema, bytes.NewReader(data)) + require.NoError(t, err) + + var i int + err = dec.Decode(&i) + + require.NoError(t, err) + assert.Equal(t, 2147483648, i) +} + func TestDecoder_IntShortRead(t *testing.T) { defer ConfigTeardown() @@ -288,7 +308,7 @@ func TestDecoder_Uint32InvalidSchema(t *testing.T) { func TestDecoder_Int64(t *testing.T) { defer ConfigTeardown() - data := []byte{0x36} + data := []byte{0x80, 0x80, 0x80, 0x80, 0x10} schema := "long" dec, err := avro.NewDecoder(schema, bytes.NewReader(data)) require.NoError(t, err) @@ -297,7 +317,7 @@ func TestDecoder_Int64(t *testing.T) { err = dec.Decode(&i) require.NoError(t, err) - assert.Equal(t, int64(27), i) + assert.Equal(t, int64(2147483648), i) } func TestDecoder_Int64ShortRead(t *testing.T) { diff --git a/encoder_native_test.go b/encoder_native_test.go index 739b1fe..435e0e2 100644 --- a/encoder_native_test.go +++ b/encoder_native_test.go @@ -268,6 +268,20 @@ func TestEncoder_Int64FromInt32(t *testing.T) { assert.Equal(t, []byte{0x36}, buf.Bytes()) } +func TestEncoder_Int64FromInt(t *testing.T) { + defer ConfigTeardown() + + schema := "long" + buf := bytes.NewBuffer([]byte{}) + enc, err := avro.NewEncoder(schema, buf) + require.NoError(t, err) + + err = enc.Encode(2147483648) + + require.NoError(t, err) + assert.Equal(t, []byte{0x80, 0x80, 0x80, 0x80, 0x10}, buf.Bytes()) +} + func TestEncoder_Int64InvalidSchema(t *testing.T) { defer ConfigTeardown()