Skip to content

Commit

Permalink
add doc comments, better error msg, more test coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
XiangpengHao committed Jun 20, 2024
1 parent 6063832 commit e46a741
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 6 deletions.
2 changes: 1 addition & 1 deletion arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ where
FROM: ByteArrayType,
FROM::Offset: OffsetSizeTrait + ToPrimitive,
V: ByteViewType,
FROM::Native: PartialEq<V::Native>, // this prevent users to convert between byte view and string views.
FROM::Native: PartialEq<V::Native>, // this prevent users to convert between byte and string types.
{
fn from(value: &GenericByteArray<FROM>) -> Self {
let byte_array = value;
Expand Down
22 changes: 18 additions & 4 deletions arrow-row/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1102,7 +1102,7 @@ fn row_lengths(cols: &[ArrayRef], encoders: &[Encoder]) -> Vec<usize> {
let len = len.to_usize().unwrap();
lengths.iter_mut().for_each(|x| *x += 1 + len)
}
_ => unreachable!(),
_ => unimplemented!("unsupported data type: {}", array.data_type()),
}
}
Encoder::Dictionary(values, null) => {
Expand Down Expand Up @@ -1186,7 +1186,7 @@ fn encode_column(
let array = column.as_any().downcast_ref().unwrap();
fixed::encode_fixed_size_binary(data, offsets, array, opts)
}
_ => unreachable!(),
_ => unimplemented!("unsupported data type: {}", column.data_type()),
}
}
Encoder::Dictionary(values, nulls) => {
Expand Down Expand Up @@ -1276,7 +1276,7 @@ unsafe fn decode_column(
DataType::LargeUtf8 => Arc::new(decode_string::<i64>(rows, options, validate_utf8)),
DataType::Utf8View => Arc::new(decode_string_view(rows, options, validate_utf8)),
DataType::Dictionary(_, _) => todo!(),
_ => unreachable!()
_ => unimplemented!("unsupported data type: {}", data_type),
}
}
Codec::Dictionary(converter, _) => {
Expand Down Expand Up @@ -2077,6 +2077,19 @@ mod tests {
.collect()
}

fn generate_byte_view(len: usize, valid_percent: f64) -> BinaryViewArray {
let mut rng = thread_rng();
(0..len)
.map(|_| {
rng.gen_bool(valid_percent).then(|| {
let len = rng.gen_range(0..100);
let bytes: Vec<_> = (0..len).map(|_| rng.gen_range(0..128)).collect();
bytes
})
})
.collect()
}

fn generate_dictionary<K>(
values: ArrayRef,
len: usize,
Expand Down Expand Up @@ -2157,7 +2170,7 @@ mod tests {

fn generate_column(len: usize) -> ArrayRef {
let mut rng = thread_rng();
match rng.gen_range(0..15) {
match rng.gen_range(0..16) {
0 => Arc::new(generate_primitive_array::<Int32Type>(len, 0.8)),
1 => Arc::new(generate_primitive_array::<UInt32Type>(len, 0.8)),
2 => Arc::new(generate_primitive_array::<Int64Type>(len, 0.8)),
Expand Down Expand Up @@ -2192,6 +2205,7 @@ mod tests {
Arc::new(generate_struct(values_len, 0.8))
})),
14 => Arc::new(generate_string_view(len, 0.8)),
15 => Arc::new(generate_byte_view(len, 0.8)),
_ => unreachable!(),
}
}
Expand Down
8 changes: 7 additions & 1 deletion arrow-row/src/variable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,8 @@ pub unsafe fn decode_string<I: OffsetSizeTrait>(
GenericStringArray::from(builder.build_unchecked())
}

pub unsafe fn decode_binary_view(rows: &mut [&[u8]], options: SortOptions) -> BinaryViewArray {
/// Decodes a binary view array from `rows` with the provided `options`
pub fn decode_binary_view(rows: &mut [&[u8]], options: SortOptions) -> BinaryViewArray {
let decoded: GenericBinaryArray<i64> = decode_binary(rows, options);

// Better performance might be to directly build the binary view instead of building to BinaryArray and then casting
Expand All @@ -278,6 +279,11 @@ pub unsafe fn decode_binary_view(rows: &mut [&[u8]], options: SortOptions) -> Bi
BinaryViewArray::from(&decoded)
}

/// Decodes a string view array from `rows` with the provided `options`
///
/// # Safety
///
/// The row must contain valid UTF-8 data
pub unsafe fn decode_string_view(
rows: &mut [&[u8]],
options: SortOptions,
Expand Down

0 comments on commit e46a741

Please sign in to comment.