diff --git a/go.mod b/go.mod index 95eb2788492..74b7d2fdcab 100644 --- a/go.mod +++ b/go.mod @@ -97,7 +97,7 @@ require ( github.com/open-telemetry/opentelemetry-collector-contrib/receiver/kafkareceiver v0.102.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/opencensusreceiver v0.102.0 github.com/open-telemetry/opentelemetry-collector-contrib/receiver/zipkinreceiver v0.102.0 - github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe + github.com/parquet-go/parquet-go v0.24.0 github.com/stoewer/parquet-cli v0.0.9 go.opentelemetry.io/collector/config/configgrpc v0.102.1 go.opentelemetry.io/collector/config/confighttp v0.102.1 diff --git a/go.sum b/go.sum index aa1e1948d3a..afa1bfc6568 100644 --- a/go.sum +++ b/go.sum @@ -737,8 +737,8 @@ github.com/openzipkin/zipkin-go v0.4.3 h1:9EGwpqkgnwdEIJ+Od7QVSEIH+ocmm5nPat0G7s github.com/openzipkin/zipkin-go v0.4.3/go.mod h1:M9wCJZFWCo2RiY+o1eBCEMe0Dp2S5LDHcMZmk3RmK7c= github.com/ovh/go-ovh v1.6.0 h1:ixLOwxQdzYDx296sXcgS35TOPEahJkpjMGtzPadCjQI= github.com/ovh/go-ovh v1.6.0/go.mod h1:cTVDnl94z4tl8pP1uZ/8jlVxntjSIf09bNcQ5TJSC7c= -github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe h1:oUJ5TPnrEK/z+/PeoLL+jCgfngAZIDMyhZASetRcYYg= -github.com/parquet-go/parquet-go v0.23.1-0.20241011155651-6446d1d0d2fe/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw= +github.com/parquet-go/parquet-go v0.24.0 h1:VrsifmLPDnas8zpoHmYiWDZ1YHzLmc7NmNwPGkI2JM4= +github.com/parquet-go/parquet-go v0.24.0/go.mod h1:OqBBRGBl7+llplCvDMql8dEKaDqjaFA/VAPw+OJiNiw= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= diff --git a/vendor/github.com/parquet-go/parquet-go/README.md b/vendor/github.com/parquet-go/parquet-go/README.md index f0f0d8f5cf1..94bbdefad5b 100644 --- a/vendor/github.com/parquet-go/parquet-go/README.md +++ b/vendor/github.com/parquet-go/parquet-go/README.md @@ -1,10 +1,13 @@ -# parquet-go/parquet-go [![build status](https://github.com/parquet-go/parquet-go/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/parquet-go/parquet-go/actions) [![Go Report Card](https://goreportcard.com/badge/github.com/parquet-go/parquet-go)](https://goreportcard.com/report/github.com/parquet-go/parquet-go) [![Go Reference](https://pkg.go.dev/badge/github.com/parquet-go/parquet-go.svg)](https://pkg.go.dev/github.com/parquet-go/parquet-go) - -High-performance Go library to manipulate parquet files, initially developed at -[Twilio Segment](https://segment.com/engineering). - -![parquet-go-logo](https://github.com/parquet-go/parquet-go/assets/96151026/5b1f043b-2cee-4a64-a3c3-40d3353fecc0) - +
+ +
+

parquet-go/parquet-go

+

+High-performance Go library to manipulate parquet files, initially developed at +Twilio Segment. +

+ +
## Motivation @@ -35,7 +38,7 @@ using JSON or Protobuf. For more information, refer to the [Parquet Format Speci The package is distributed as a standard Go module that programs can take a dependency on and install with the following command: -``` +```bash go get github.com/parquet-go/parquet-go ``` @@ -289,11 +292,11 @@ defined by the sorting columns of the groups. There are a few constraints when merging row groups: -* The sorting columns of all the row groups must be the same, or the merge +- The sorting columns of all the row groups must be the same, or the merge operation must be explicitly configured a set of sorting columns which are a prefix of the sorting columns of all merged row groups. -* The schemas of row groups must all be equal, or the merge operation must +- The schemas of row groups must all be equal, or the merge operation must be explicitly configured with a schema that all row groups can be converted to, in which case the limitations of schema conversions apply. diff --git a/vendor/github.com/parquet-go/parquet-go/bloom.go b/vendor/github.com/parquet-go/parquet-go/bloom.go index 69d54c71712..30c64b84861 100644 --- a/vendor/github.com/parquet-go/parquet-go/bloom.go +++ b/vendor/github.com/parquet-go/parquet-go/bloom.go @@ -1,7 +1,6 @@ package parquet import ( - "encoding/binary" "io" "github.com/parquet-go/parquet-go/bloom" @@ -10,7 +9,6 @@ import ( "github.com/parquet-go/parquet-go/encoding" "github.com/parquet-go/parquet-go/format" "github.com/parquet-go/parquet-go/internal/unsafecast" - "golang.org/x/sys/cpu" ) // BloomFilter is an interface allowing applications to test whether a key @@ -174,18 +172,7 @@ func (splitBlockEncoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) { } func (e splitBlockEncoding) EncodeInt96(dst []byte, src []deprecated.Int96) ([]byte, error) { - if cpu.IsBigEndian { - srcLen := len(src) - buf := make([]byte, srcLen*12) - for idx := range srcLen { - binary.LittleEndian.PutUint32(buf[(idx*12):4+(idx*12)], uint32(src[idx][0])) - binary.LittleEndian.PutUint32(buf[4+(idx*12):8+(idx*12)], uint32(src[idx][1])) - binary.LittleEndian.PutUint32(buf[8+(idx*12):12+(idx*12)], uint32(src[idx][2])) - } - splitBlockEncodeFixedLenByteArray(bloom.MakeSplitBlockFilter(dst), buf, 12) - } else { - splitBlockEncodeFixedLenByteArray(bloom.MakeSplitBlockFilter(dst), unsafecast.Slice[byte](src), 12) - } + splitBlockEncodeFixedLenByteArray(bloom.MakeSplitBlockFilter(dst), unsafecastInt96ToBytes(src), 12) return dst, nil } diff --git a/vendor/github.com/parquet-go/parquet-go/bloom_be.go b/vendor/github.com/parquet-go/parquet-go/bloom_be.go new file mode 100644 index 00000000000..f7800301a68 --- /dev/null +++ b/vendor/github.com/parquet-go/parquet-go/bloom_be.go @@ -0,0 +1,19 @@ +//go:build s390x + +package parquet + +import ( + "encoding/binary" + + "github.com/parquet-go/parquet-go/deprecated" +) + +func unsafecastInt96ToBytes(src []deprecated.Int96) []byte { + out := make([]byte, len(src)*12) + for i := range src { + binary.LittleEndian.PutUint32(out[(i*12):4+(i*12)], uint32(src[i][0])) + binary.LittleEndian.PutUint32(out[4+(i*12):8+(i*12)], uint32(src[i][1])) + binary.LittleEndian.PutUint32(out[8+(i*12):12+(i*12)], uint32(src[i][2])) + } + return out +} diff --git a/vendor/github.com/parquet-go/parquet-go/bloom_le.go b/vendor/github.com/parquet-go/parquet-go/bloom_le.go new file mode 100644 index 00000000000..5b93bf07177 --- /dev/null +++ b/vendor/github.com/parquet-go/parquet-go/bloom_le.go @@ -0,0 +1,12 @@ +//go:build !s390x + +package parquet + +import ( + "github.com/parquet-go/parquet-go/deprecated" + "github.com/parquet-go/parquet-go/internal/unsafecast" +) + +func unsafecastInt96ToBytes(src []deprecated.Int96) []byte { + return unsafecast.Slice[byte](src) +} diff --git a/vendor/github.com/parquet-go/parquet-go/column_buffer.go b/vendor/github.com/parquet-go/parquet-go/column_buffer.go index 8435cb2977c..d1bc339862d 100644 --- a/vendor/github.com/parquet-go/parquet-go/column_buffer.go +++ b/vendor/github.com/parquet-go/parquet-go/column_buffer.go @@ -16,12 +16,8 @@ import ( "github.com/parquet-go/parquet-go/internal/bitpack" "github.com/parquet-go/parquet-go/internal/unsafecast" "github.com/parquet-go/parquet-go/sparse" - "golang.org/x/sys/cpu" ) -const offsetOfU64 = unsafe.Offsetof(Value{}.u64) -const offsetOfPtr = unsafe.Offsetof(Value{}.ptr) - // ColumnBuffer is an interface representing columns of a row group. // // ColumnBuffer implements sort.Interface as a way to support reordering the @@ -107,29 +103,6 @@ func columnIndexOfNullable(base ColumnBuffer, maxDefinitionLevel byte, definitio }, nil } -// On a big endian system, a boolean/byte value, which is in little endian byte format, is byte aligned -// to the 7th byte in a u64 (8 bytes) variable.. Hence the data will be available at 7th byte when -// interpreted as a little endian byte format. So, in order to access a boolean/byte value out of u64 variable, -// we need to add an offset of "7"... -// In the same way, an int32/uint32/float value, which is in little endian byte format, is byte aligned -// to the 4th byte in a u64 (8 bytes) variable.. Hence the data will be available at 4th byte when -// interpreted as a little endian byte format. So, in order to access an int32/uint32/float value out of u64 variable, -// we need to add an offset of "4" -func getOffset(colDict interface{}) uintptr { - var offset uintptr = 0 - - if cpu.IsBigEndian { - switch colDict.(type) { - case booleanColumnBuffer, booleanDictionary: - offset = 7 - - case int32ColumnBuffer, uint32ColumnBuffer, floatColumnBuffer, int32Dictionary, floatDictionary, uint32Dictionary: - offset = 4 - } - } - return offset -} - type nullableColumnIndex struct { ColumnIndex maxDefinitionLevel byte @@ -855,8 +828,7 @@ func (col *booleanColumnBuffer) WriteBooleans(values []bool) (int, error) { } func (col *booleanColumnBuffer) WriteValues(values []Value) (int, error) { - offset := getOffset(*col) - col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{}) + col.writeValues(makeArrayValue(values, offsetOfBool), columnLevels{}) return len(values), nil } @@ -995,8 +967,7 @@ func (col *int32ColumnBuffer) WriteInt32s(values []int32) (int, error) { } func (col *int32ColumnBuffer) WriteValues(values []Value) (int, error) { - offset := getOffset(*col) - col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{}) + col.writeValues(makeArrayValue(values, offsetOfU32), columnLevels{}) return len(values), nil } @@ -1288,8 +1259,7 @@ func (col *floatColumnBuffer) WriteFloats(values []float32) (int, error) { } func (col *floatColumnBuffer) WriteValues(values []Value) (int, error) { - offset := getOffset(*col) - col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{}) + col.writeValues(makeArrayValue(values, offsetOfU32), columnLevels{}) return len(values), nil } @@ -1776,8 +1746,7 @@ func (col *uint32ColumnBuffer) WriteUint32s(values []uint32) (int, error) { } func (col *uint32ColumnBuffer) WriteValues(values []Value) (int, error) { - offset := getOffset(*col) - col.writeValues(makeArrayValue(values, offsetOfU64+offset), columnLevels{}) + col.writeValues(makeArrayValue(values, offsetOfU32), columnLevels{}) return len(values), nil } diff --git a/vendor/github.com/parquet-go/parquet-go/dictionary.go b/vendor/github.com/parquet-go/parquet-go/dictionary.go index 9dce0ff6514..5ff0417b0f7 100644 --- a/vendor/github.com/parquet-go/parquet-go/dictionary.go +++ b/vendor/github.com/parquet-go/parquet-go/dictionary.go @@ -140,8 +140,7 @@ func (d *booleanDictionary) Index(i int32) Value { return d.makeValue(d.index(i) func (d *booleanDictionary) index(i int32) bool { return d.valueAt(int(i)) } func (d *booleanDictionary) Insert(indexes []int32, values []Value) { - offset := getOffset(*d) - d.insert(indexes, makeArrayValue(values, offsetOfU64+offset)) + d.insert(indexes, makeArrayValue(values, offsetOfBool)) } func (d *booleanDictionary) insert(indexes []int32, rows sparse.Array) { @@ -238,8 +237,7 @@ func (d *int32Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) func (d *int32Dictionary) index(i int32) int32 { return d.values[i] } func (d *int32Dictionary) Insert(indexes []int32, values []Value) { - offset := getOffset(*d) - d.insert(indexes, makeArrayValue(values, offsetOfU64+offset)) + d.insert(indexes, makeArrayValue(values, offsetOfU32)) } func (d *int32Dictionary) init(indexes []int32) { @@ -291,8 +289,7 @@ func (d *int32Dictionary) insert(indexes []int32, rows sparse.Array) { func (d *int32Dictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(0) memsetValues(values, model) - offset := getOffset(*d) - d.lookup(indexes, makeArrayValue(values, offsetOfU64+offset)) + d.lookup(indexes, makeArrayValue(values, offsetOfU32)) } func (d *int32Dictionary) Bounds(indexes []int32) (min, max Value) { @@ -520,8 +517,7 @@ func (d *floatDictionary) Index(i int32) Value { return d.makeValue(d.index(i)) func (d *floatDictionary) index(i int32) float32 { return d.values[i] } func (d *floatDictionary) Insert(indexes []int32, values []Value) { - offset := getOffset(*d) - d.insert(indexes, makeArrayValue(values, offsetOfU64+offset)) + d.insert(indexes, makeArrayValue(values, offsetOfU32)) } func (d *floatDictionary) init(indexes []int32) { @@ -560,8 +556,7 @@ func (d *floatDictionary) insert(indexes []int32, rows sparse.Array) { func (d *floatDictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(0) memsetValues(values, model) - offset := getOffset(*d) - d.lookup(indexes, makeArrayValue(values, offsetOfU64+offset)) + d.lookup(indexes, makeArrayValue(values, offsetOfU32)) } func (d *floatDictionary) Bounds(indexes []int32) (min, max Value) { @@ -930,8 +925,7 @@ func (d *uint32Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) func (d *uint32Dictionary) index(i int32) uint32 { return d.values[i] } func (d *uint32Dictionary) Insert(indexes []int32, values []Value) { - offset := getOffset(*d) - d.insert(indexes, makeArrayValue(values, offsetOfU64+offset)) + d.insert(indexes, makeArrayValue(values, offsetOfU32)) } func (d *uint32Dictionary) init(indexes []int32) { @@ -970,8 +964,7 @@ func (d *uint32Dictionary) insert(indexes []int32, rows sparse.Array) { func (d *uint32Dictionary) Lookup(indexes []int32, values []Value) { model := d.makeValue(0) memsetValues(values, model) - offset := getOffset(*d) - d.lookup(indexes, makeArrayValue(values, offsetOfU64+offset)) + d.lookup(indexes, makeArrayValue(values, offsetOfU32)) } func (d *uint32Dictionary) Bounds(indexes []int32) (min, max Value) { diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go b/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go index 8a58af5c39d..690bc815555 100644 --- a/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go +++ b/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain.go @@ -6,7 +6,6 @@ package plain import ( "encoding/binary" "fmt" - "golang.org/x/sys/cpu" "io" "math" @@ -37,70 +36,10 @@ func (e *Encoding) EncodeBoolean(dst []byte, src []byte) ([]byte, error) { return append(dst[:0], src...), nil } -func (e *Encoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) { - if cpu.IsBigEndian { - srcLen := len(src) - byteEnc := make([]byte, (srcLen * 4)) - idx := 0 - for k := range srcLen { - binary.LittleEndian.PutUint32(byteEnc[idx:(4+idx)], uint32((src)[k])) - idx += 4 - } - return append(dst[:0], (byteEnc)...), nil - } else { - return append(dst[:0], unsafecast.Slice[byte](src)...), nil - } -} - -func (e *Encoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) { - if cpu.IsBigEndian { - srcLen := len(src) - byteEnc := make([]byte, (srcLen * 8)) - idx := 0 - for k := range srcLen { - binary.LittleEndian.PutUint64(byteEnc[idx:(8+idx)], uint64((src)[k])) - idx += 8 - } - return append(dst[:0], (byteEnc)...), nil - } else { - return append(dst[:0], unsafecast.Slice[byte](src)...), nil - } -} - func (e *Encoding) EncodeInt96(dst []byte, src []deprecated.Int96) ([]byte, error) { return append(dst[:0], unsafecast.Slice[byte](src)...), nil } -func (e *Encoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) { - if cpu.IsBigEndian { - srcLen := len(src) - byteEnc := make([]byte, (srcLen * 4)) - idx := 0 - for k := range srcLen { - binary.LittleEndian.PutUint32(byteEnc[idx:(4+idx)], math.Float32bits((src)[k])) - idx += 4 - } - return append(dst[:0], (byteEnc)...), nil - } else { - return append(dst[:0], unsafecast.Slice[byte](src)...), nil - } -} - -func (e *Encoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) { - if cpu.IsBigEndian { - srcLen := len(src) - byteEnc := make([]byte, (srcLen * 8)) - idx := 0 - for k := range srcLen { - binary.LittleEndian.PutUint64(byteEnc[idx:(8+idx)], math.Float64bits((src)[k])) - idx += 8 - } - return append(dst[:0], (byteEnc)...), nil - } else { - return append(dst[:0], unsafecast.Slice[byte](src)...), nil - } -} - func (e *Encoding) EncodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, error) { dst = dst[:0] @@ -127,45 +66,6 @@ func (e *Encoding) DecodeBoolean(dst []byte, src []byte) ([]byte, error) { return append(dst[:0], src...), nil } -func (e *Encoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) { - if (len(src) % 4) != 0 { - return dst, encoding.ErrDecodeInvalidInputSize(e, "INT32", len(src)) - } - - if cpu.IsBigEndian { - srcLen := (len(src) / 4) - byteDec := make([]int32, srcLen) - idx := 0 - for k := range srcLen { - byteDec[k] = int32(binary.LittleEndian.Uint32((src)[idx:(4 + idx)])) - idx += 4 - } - return append(dst[:0], (byteDec)...), nil - } else { - return append(dst[:0], unsafecast.Slice[int32](src)...), nil - } -} - -func (e *Encoding) DecodeInt64(dst []int64, src []byte) ([]int64, error) { - if (len(src) % 8) != 0 { - return dst, encoding.ErrDecodeInvalidInputSize(e, "INT64", len(src)) - } - - if cpu.IsBigEndian { - srcLen := (len(src) / 8) - byteDec := make([]int64, srcLen) - idx := 0 - for k := range srcLen { - byteDec[k] = int64(binary.LittleEndian.Uint64((src)[idx:(8 + idx)])) - idx += 8 - } - - return append(dst[:0], (byteDec)...), nil - } else { - return append(dst[:0], unsafecast.Slice[int64](src)...), nil - } -} - func (e *Encoding) DecodeInt96(dst []deprecated.Int96, src []byte) ([]deprecated.Int96, error) { if (len(src) % 12) != 0 { return dst, encoding.ErrDecodeInvalidInputSize(e, "INT96", len(src)) @@ -173,44 +73,6 @@ func (e *Encoding) DecodeInt96(dst []deprecated.Int96, src []byte) ([]deprecated return append(dst[:0], unsafecast.Slice[deprecated.Int96](src)...), nil } -func (e *Encoding) DecodeFloat(dst []float32, src []byte) ([]float32, error) { - if (len(src) % 4) != 0 { - return dst, encoding.ErrDecodeInvalidInputSize(e, "FLOAT", len(src)) - } - if cpu.IsBigEndian { - srcLen := (len(src) / 4) - byteDec := make([]float32, srcLen) - idx := 0 - for k := range srcLen { - byteDec[k] = float32(math.Float32frombits(binary.LittleEndian.Uint32((src)[idx:(4 + idx)]))) - idx += 4 - } - - return append(dst[:0], (byteDec)...), nil - } else { - return append(dst[:0], unsafecast.Slice[float32](src)...), nil - } -} - -func (e *Encoding) DecodeDouble(dst []float64, src []byte) ([]float64, error) { - if (len(src) % 8) != 0 { - return dst, encoding.ErrDecodeInvalidInputSize(e, "DOUBLE", len(src)) - } - if cpu.IsBigEndian { - srcLen := (len(src) / 8) - byteDec := make([]float64, srcLen) - idx := 0 - for k := range srcLen { - byteDec[k] = float64(math.Float64frombits(binary.LittleEndian.Uint64((src)[idx:(8 + idx)]))) - idx += 8 - } - - return append(dst[:0], (byteDec)...), nil - } else { - return append(dst[:0], unsafecast.Slice[float64](src)...), nil - } -} - func (e *Encoding) DecodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, []uint32, error) { dst, offsets = dst[:0], offsets[:0] diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain_be.go b/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain_be.go new file mode 100644 index 00000000000..6c8c9000b52 --- /dev/null +++ b/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain_be.go @@ -0,0 +1,113 @@ +//go:build s390x + +package plain + +import ( + "encoding/binary" + "math" + + "github.com/parquet-go/parquet-go/encoding" +) + +// TODO: optimize by doing the byte swap in the output slice instead of +// allocating a temporay buffer. + +func (e *Encoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) { + srcLen := len(src) + byteEnc := make([]byte, (srcLen * 4)) + idx := 0 + for k := range srcLen { + binary.LittleEndian.PutUint32(byteEnc[idx:(4+idx)], uint32((src)[k])) + idx += 4 + } + return append(dst[:0], (byteEnc)...), nil +} + +func (e *Encoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) { + srcLen := len(src) + byteEnc := make([]byte, (srcLen * 8)) + idx := 0 + for k := range srcLen { + binary.LittleEndian.PutUint64(byteEnc[idx:(8+idx)], uint64((src)[k])) + idx += 8 + } + return append(dst[:0], (byteEnc)...), nil +} + +func (e *Encoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) { + srcLen := len(src) + byteEnc := make([]byte, (srcLen * 4)) + idx := 0 + for k := range srcLen { + binary.LittleEndian.PutUint32(byteEnc[idx:(4+idx)], math.Float32bits((src)[k])) + idx += 4 + } + return append(dst[:0], (byteEnc)...), nil +} + +func (e *Encoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) { + srcLen := len(src) + byteEnc := make([]byte, (srcLen * 8)) + idx := 0 + for k := range srcLen { + binary.LittleEndian.PutUint64(byteEnc[idx:(8+idx)], math.Float64bits((src)[k])) + idx += 8 + } + return append(dst[:0], (byteEnc)...), nil +} + +func (e *Encoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) { + if (len(src) % 4) != 0 { + return dst, encoding.ErrDecodeInvalidInputSize(e, "INT32", len(src)) + } + srcLen := (len(src) / 4) + byteDec := make([]int32, srcLen) + idx := 0 + for k := range srcLen { + byteDec[k] = int32(binary.LittleEndian.Uint32((src)[idx:(4 + idx)])) + idx += 4 + } + return append(dst[:0], (byteDec)...), nil +} + +func (e *Encoding) DecodeInt64(dst []int64, src []byte) ([]int64, error) { + if (len(src) % 8) != 0 { + return dst, encoding.ErrDecodeInvalidInputSize(e, "INT64", len(src)) + } + srcLen := (len(src) / 8) + byteDec := make([]int64, srcLen) + idx := 0 + for k := range srcLen { + byteDec[k] = int64(binary.LittleEndian.Uint64((src)[idx:(8 + idx)])) + idx += 8 + } + return append(dst[:0], (byteDec)...), nil +} + +func (e *Encoding) DecodeFloat(dst []float32, src []byte) ([]float32, error) { + if (len(src) % 4) != 0 { + return dst, encoding.ErrDecodeInvalidInputSize(e, "FLOAT", len(src)) + } + srcLen := (len(src) / 4) + byteDec := make([]float32, srcLen) + idx := 0 + for k := range srcLen { + byteDec[k] = float32(math.Float32frombits(binary.LittleEndian.Uint32((src)[idx:(4 + idx)]))) + idx += 4 + } + return append(dst[:0], (byteDec)...), nil +} + +func (e *Encoding) DecodeDouble(dst []float64, src []byte) ([]float64, error) { + if (len(src) % 8) != 0 { + return dst, encoding.ErrDecodeInvalidInputSize(e, "DOUBLE", len(src)) + } + srcLen := (len(src) / 8) + byteDec := make([]float64, srcLen) + idx := 0 + for k := range srcLen { + byteDec[k] = float64(math.Float64frombits(binary.LittleEndian.Uint64((src)[idx:(8 + idx)]))) + idx += 8 + } + return append(dst[:0], (byteDec)...), nil +} diff --git a/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain_le.go b/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain_le.go new file mode 100644 index 00000000000..bd1eadf6a06 --- /dev/null +++ b/vendor/github.com/parquet-go/parquet-go/encoding/plain/plain_le.go @@ -0,0 +1,52 @@ +//go:build !s390x + +package plain + +import ( + "github.com/parquet-go/parquet-go/encoding" + "github.com/parquet-go/parquet-go/internal/unsafecast" +) + +func (e *Encoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) { + return append(dst[:0], unsafecast.Slice[byte](src)...), nil +} + +func (e *Encoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) { + return append(dst[:0], unsafecast.Slice[byte](src)...), nil +} + +func (e *Encoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) { + return append(dst[:0], unsafecast.Slice[byte](src)...), nil +} + +func (e *Encoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) { + return append(dst[:0], unsafecast.Slice[byte](src)...), nil +} + +func (e *Encoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) { + if (len(src) % 4) != 0 { + return dst, encoding.ErrDecodeInvalidInputSize(e, "INT32", len(src)) + } + return append(dst[:0], unsafecast.Slice[int32](src)...), nil +} + +func (e *Encoding) DecodeInt64(dst []int64, src []byte) ([]int64, error) { + if (len(src) % 8) != 0 { + return dst, encoding.ErrDecodeInvalidInputSize(e, "INT64", len(src)) + } + return append(dst[:0], unsafecast.Slice[int64](src)...), nil +} + +func (e *Encoding) DecodeFloat(dst []float32, src []byte) ([]float32, error) { + if (len(src) % 4) != 0 { + return dst, encoding.ErrDecodeInvalidInputSize(e, "FLOAT", len(src)) + } + return append(dst[:0], unsafecast.Slice[float32](src)...), nil +} + +func (e *Encoding) DecodeDouble(dst []float64, src []byte) ([]float64, error) { + if (len(src) % 8) != 0 { + return dst, encoding.ErrDecodeInvalidInputSize(e, "DOUBLE", len(src)) + } + return append(dst[:0], unsafecast.Slice[float64](src)...), nil +} diff --git a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_be.go b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_be.go new file mode 100644 index 00000000000..0f4ba054c42 --- /dev/null +++ b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_be.go @@ -0,0 +1,15 @@ +//go:build s390x + +package bitpack + +import "encoding/binary" + +func unsafecastBytesToUint32(src []byte) []uint32 { + out := make([]uint32, len(src)/4) + idx := 0 + for k := range out { + out[k] = binary.LittleEndian.Uint32((src)[idx:(4 + idx)]) + idx += 4 + } + return out +} diff --git a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_le.go b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_le.go new file mode 100644 index 00000000000..f754e704ff1 --- /dev/null +++ b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_le.go @@ -0,0 +1,9 @@ +//go:build !s390x + +package bitpack + +import "github.com/parquet-go/parquet-go/internal/unsafecast" + +func unsafecastBytesToUint32(src []byte) []uint32 { + return unsafecast.Slice[uint32](src) +} diff --git a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go index cddbd773a51..1e65d8c02b9 100644 --- a/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go +++ b/vendor/github.com/parquet-go/parquet-go/internal/bitpack/unpack_int32_purego.go @@ -2,28 +2,8 @@ package bitpack -import ( - "encoding/binary" - - "golang.org/x/sys/cpu" - - "github.com/parquet-go/parquet-go/internal/unsafecast" -) - func unpackInt32(dst []int32, src []byte, bitWidth uint) { - var bits []uint32 - if cpu.IsBigEndian { - srcLen := (len(src) / 4) - bits = make([]uint32, srcLen) - idx := 0 - for k := range srcLen { - bits[k] = binary.LittleEndian.Uint32((src)[idx:(4 + idx)]) - idx += 4 - } - } else { - bits = unsafecast.Slice[uint32](src) - } - + bits := unsafecastBytesToUint32(src) bitMask := uint32(1<