Skip to content

Commit

Permalink
feat: add ListView equal
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikkon committed Jan 12, 2025
1 parent b77d38d commit d75ebb5
Show file tree
Hide file tree
Showing 3 changed files with 213 additions and 6 deletions.
73 changes: 73 additions & 0 deletions arrow-data/src/equal/list_view.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::ArrayData;
use arrow_buffer::ArrowNativeType;
use num::Integer;

use super::equal_range;

pub(super) fn list_view_equal<T: ArrowNativeType + Integer>(
lhs: &ArrayData,
rhs: &ArrayData,
lhs_start: usize,
rhs_start: usize,
len: usize,
) -> bool {
let lhs_offsets = lhs.buffer::<T>(0);
let rhs_offsets = rhs.buffer::<T>(0);
let lhs_sizes = lhs.buffer::<T>(1);
let rhs_sizes = rhs.buffer::<T>(1);
let lhs_nulls = lhs.nulls();
let rhs_nulls = rhs.nulls();
for i in 0..len {
let lhs_pos = lhs_start + i;
let rhs_pos = rhs_start + i;

// get offset and size
let lhs_offset_start = lhs_offsets[lhs_pos].to_usize().unwrap();
let rhs_offset_start = rhs_offsets[rhs_pos].to_usize().unwrap();
let lhs_size = lhs_sizes[lhs_pos].to_usize().unwrap();
let rhs_size = rhs_sizes[rhs_pos].to_usize().unwrap();

if lhs_size != rhs_size {
return false;
}

// check if null
if let (Some(lhs_null), Some(rhs_null)) = (lhs_nulls, rhs_nulls) {
if lhs_null.is_null(lhs_pos) != rhs_null.is_null(rhs_pos) {
return false;
}
if lhs_null.is_null(lhs_pos) {
continue;
}
}

// compare values
if !equal_range(
&lhs.child_data()[0],
&rhs.child_data()[0],
lhs_offset_start,
rhs_offset_start,
lhs_size,
) {
return false;
}
}
true
}
7 changes: 4 additions & 3 deletions arrow-data/src/equal/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ mod dictionary;
mod fixed_binary;
mod fixed_list;
mod list;
mod list_view;
mod null;
mod primitive;
mod run;
Expand All @@ -47,6 +48,7 @@ use dictionary::dictionary_equal;
use fixed_binary::fixed_binary_equal;
use fixed_list::fixed_list_equal;
use list::list_equal;
use list_view::list_view_equal;
use null::null_equal;
use primitive::primitive_equal;
use structure::struct_equal;
Expand Down Expand Up @@ -102,9 +104,8 @@ fn equal_values(
byte_view_equal(lhs, rhs, lhs_start, rhs_start, len)
}
DataType::List(_) => list_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len),
DataType::ListView(_) | DataType::LargeListView(_) => {
unimplemented!("ListView/LargeListView not yet implemented")
}
DataType::ListView(_) => list_view_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len),
DataType::LargeListView(_) => list_view_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
DataType::LargeList(_) => list_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
DataType::FixedSizeList(_, _) => fixed_list_equal(lhs, rhs, lhs_start, rhs_start, len),
DataType::Struct(_) => struct_equal(lhs, rhs, lhs_start, rhs_start, len),
Expand Down
139 changes: 136 additions & 3 deletions arrow/tests/array_equal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
use arrow::array::{
make_array, Array, ArrayRef, BooleanArray, Decimal128Array, FixedSizeBinaryArray,
FixedSizeBinaryBuilder, FixedSizeListBuilder, GenericBinaryArray, GenericStringArray,
Int32Array, Int32Builder, Int64Builder, ListArray, ListBuilder, NullArray, OffsetSizeTrait,
StringArray, StringDictionaryBuilder, StructArray, UnionBuilder,
Int32Array, Int32Builder, Int64Builder, ListArray, ListBuilder, ListViewBuilder, NullArray,
OffsetSizeTrait, StringArray, StringDictionaryBuilder, StructArray, UnionBuilder,
};
use arrow::datatypes::{Int16Type, Int32Type};
use arrow_array::builder::{StringBuilder, StringViewBuilder, StructBuilder};
use arrow_array::{DictionaryArray, FixedSizeListArray, StringViewArray};
use arrow_array::{DictionaryArray, FixedSizeListArray, ListViewArray, StringViewArray};
use arrow_buffer::{Buffer, ToByteSlice};
use arrow_data::{ArrayData, ArrayDataBuilder};
use arrow_schema::{DataType, Field, Fields};
Expand Down Expand Up @@ -1292,3 +1292,136 @@ fn test_list_excess_children_equal() {
assert_eq!(b.value_offsets(), &[0, 0, 2]);
assert_eq!(a, b);
}

fn create_list_view_array<U: AsRef<[i32]>, T: AsRef<[Option<U>]>>(data: T) -> ListViewArray {
let mut builder = ListViewBuilder::new(Int32Builder::with_capacity(10));
for d in data.as_ref() {
if let Some(v) = d {
builder.values().append_slice(v.as_ref());
builder.append(true);
} else {
builder.append(false);
}
}
builder.finish()
}

#[test]
fn test_list_view_equal() {
let a = create_list_view_array([Some(&[1, 2, 3]), Some(&[4, 5, 6])]);
let b = create_list_view_array([Some(&[1, 2, 3]), Some(&[4, 5, 6])]);
test_equal(&a, &b, true);

let b = create_list_view_array([Some(&[1, 2, 3]), Some(&[4, 5, 7])]);
test_equal(&a, &b, false);
}

#[test]
fn test_empty_offsets_list_view_equal() {
let empty: Vec<i32> = vec![];
let values = Int32Array::from(empty);
let empty_offsets: [u8; 0] = [];
let empty_sizes: [u8; 0] = [];
let a: ListViewArray = ArrayDataBuilder::new(DataType::ListView(Arc::new(
Field::new_list_field(DataType::Int32, true),
)))
.len(0)
.add_buffer(Buffer::from(&empty_offsets))
.add_buffer(Buffer::from(&empty_sizes))
.add_child_data(values.to_data())
.null_bit_buffer(Some(Buffer::from(&empty_offsets)))
.build()
.unwrap()
.into();

let b: ListViewArray = ArrayDataBuilder::new(DataType::ListView(Arc::new(
Field::new_list_field(DataType::Int32, true),
)))
.len(0)
.add_buffer(Buffer::from(&empty_offsets))
.add_buffer(Buffer::from(&empty_sizes))
.add_child_data(values.to_data())
.null_bit_buffer(Some(Buffer::from(&empty_offsets)))
.build()
.unwrap()
.into();

test_equal(&a, &b, true);
}

// Test the case where null_count > 0
#[test]
fn test_list_view_null() {
let a = create_list_view_array([Some(&[1, 2]), None, None, Some(&[3, 4]), None, None]);
let b = create_list_view_array([Some(&[1, 2]), None, None, Some(&[3, 4]), None, None]);
test_equal(&a, &b, true);

let b = create_list_view_array([
Some(&[1, 2]),
None,
Some(&[5, 6]),
Some(&[3, 4]),
None,
None,
]);
test_equal(&a, &b, false);

let b = create_list_view_array([Some(&[1, 2]), None, None, Some(&[3, 5]), None, None]);
test_equal(&a, &b, false);

// a list where the nullness of values is determined by the list's bitmap
let c_values = Int32Array::from(vec![1, 2, -1, -2, 3, 4, -3, -4]);
let c: ListViewArray = ArrayDataBuilder::new(DataType::ListView(Arc::new(
Field::new_list_field(DataType::Int32, true),
)))
.len(8)
.add_buffer(Buffer::from([0i32, 2, 3, 4, 4, 1, 4, 4].to_byte_slice()))
.add_buffer(Buffer::from([3i32, 2, 1, 2, 1, 1, 1, 1].to_byte_slice()))
.add_child_data(c_values.into_data())
.null_bit_buffer(Some(Buffer::from([0b0001001])))
.build()
.unwrap()
.into();

let d_values = Int32Array::from(vec![
Some(1),
Some(2),
Some(-1),
None,
Some(3),
Some(4),
None,
None,
]);
let d: ListViewArray = ArrayDataBuilder::new(DataType::ListView(Arc::new(
Field::new_list_field(DataType::Int32, true),
)))
.len(8)
.add_buffer(Buffer::from([0i32, 2, 3, 4, 4, 1, 4, 4].to_byte_slice()))
.add_buffer(Buffer::from([3i32, 2, 1, 2, 1, 1, 1, 1].to_byte_slice()))
.add_child_data(d_values.into_data())
.null_bit_buffer(Some(Buffer::from([0b0001001])))
.build()
.unwrap()
.into();
test_equal(&c, &d, true);
}

// Test the case where offset != 0
#[test]
fn test_list_view_offsets() {
let a = create_list_view_array([Some(&[1, 2]), None, None, Some(&[3, 4]), None, None]);
let b = create_list_view_array([Some(&[1, 2]), None, None, Some(&[3, 5]), None, None]);

let a_slice = a.slice(0, 3);
let b_slice = b.slice(0, 3);
test_equal(&a_slice, &b_slice, true);

let a_slice = a.slice(0, 5);
let b_slice = b.slice(0, 5);
test_equal(&a_slice, &b_slice, false);

let a_slice = a.slice(4, 1);
let b_slice = b.slice(4, 1);
test_equal(&a_slice, &b_slice, true);
}

0 comments on commit d75ebb5

Please sign in to comment.