Skip to content

Commit

Permalink
Add Array::shrink_to_fit
Browse files Browse the repository at this point in the history
  • Loading branch information
emilk committed Nov 25, 2024
1 parent 43700fd commit 53d3eef
Show file tree
Hide file tree
Showing 15 changed files with 178 additions and 17 deletions.
7 changes: 7 additions & 0 deletions arrow-array/src/array/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,13 @@ impl Array for BooleanArray {
self.values.is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
values: self.values.clone().shrink_to_fit(),
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
})
}

fn offset(&self) -> usize {
self.values.offset()
}
Expand Down
9 changes: 9 additions & 0 deletions arrow-array/src/array/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,15 @@ impl<T: ByteArrayType> Array for GenericByteArray<T> {
self.value_offsets.len() <= 1
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
value_offsets: self.value_offsets.clone().shrink_to_fit(),
value_data: self.value_data.clone().shrink_to_fit(),
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
})
}

fn offset(&self) -> usize {
0
}
Expand Down
48 changes: 31 additions & 17 deletions arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -430,31 +430,31 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
///
/// Before GC:
/// ```text
/// ┌──────┐
/// │......│
/// │......│
/// ┌────────────────────┐ ┌ ─ ─ ─ ▶ │Data1 │ Large buffer
/// ┌──────┐
/// │......│
/// │......│
/// ┌────────────────────┐ ┌ ─ ─ ─ ▶ │Data1 │ Large buffer
/// │ View 1 │─ ─ ─ ─ │......│ with data that
/// ├────────────────────┤ │......│ is not referred
/// │ View 2 │─ ─ ─ ─ ─ ─ ─ ─▶ │Data2 │ to by View 1 or
/// └────────────────────┘ │......│ View 2
/// │......│
/// 2 views, refer to │......│
/// small portions of a └──────┘
/// large buffer
/// └────────────────────┘ │......│ View 2
/// │......│
/// 2 views, refer to │......│
/// small portions of a └──────┘
/// large buffer
/// ```
///
///
/// After GC:
///
/// ```text
/// ┌────────────────────┐ ┌─────┐ After gc, only
/// │ View 1 │─ ─ ─ ─ ─ ─ ─ ─▶ │Data1│ data that is
/// ├────────────────────┤ ┌ ─ ─ ─ ▶ │Data2│ pointed to by
/// │ View 2 │─ ─ ─ ─ └─────┘ the views is
/// └────────────────────┘ left
///
///
/// 2 views
/// │ View 1 │─ ─ ─ ─ ─ ─ ─ ─▶ │Data1│ data that is
/// ├────────────────────┤ ┌ ─ ─ ─ ▶ │Data2│ pointed to by
/// │ View 2 │─ ─ ─ ─ └─────┘ the views is
/// └────────────────────┘ left
///
///
/// 2 views
/// ```
/// This method will compact the data buffers by recreating the view array and only include the data
/// that is pointed to by the views.
Expand Down Expand Up @@ -575,6 +575,20 @@ impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {
self.views.is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
views: self.views.clone().shrink_to_fit(),
buffers: self
.buffers
.iter()
.map(|b| b.clone().shrink_to_fit())
.collect(),
phantom: self.phantom,
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
})
}

fn offset(&self) -> usize {
0
}
Expand Down
19 changes: 19 additions & 0 deletions arrow-array/src/array/dictionary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,21 @@ impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
self.keys.is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
keys: self
.keys
.shrink_to_fit()
.as_any()
.downcast_ref::<PrimitiveArray<T>>()
.unwrap()
.clone(),
values: self.values.shrink_to_fit(),
is_ordered: self.is_ordered,
})
}

fn offset(&self) -> usize {
self.keys.offset()
}
Expand Down Expand Up @@ -874,6 +889,10 @@ impl<K: ArrowDictionaryKeyType, V: Sync> Array for TypedDictionaryArray<'_, K, V
self.dictionary.is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
unimplemented!("shrink_to_fit cannot be implemented for TypedDictionaryArray")
}

fn offset(&self) -> usize {
self.dictionary.offset()
}
Expand Down
10 changes: 10 additions & 0 deletions arrow-array/src/array/fixed_size_binary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,16 @@ impl Array for FixedSizeBinaryArray {
self.len == 0
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
value_data: self.value_data.clone().shrink_to_fit(),
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
len: self.len,
value_length: self.value_length,
})
}

fn offset(&self) -> usize {
0
}
Expand Down
10 changes: 10 additions & 0 deletions arrow-array/src/array/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,16 @@ impl Array for FixedSizeListArray {
self.len == 0
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
values: self.values.shrink_to_fit(),
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
value_length: self.value_length,
len: self.len,
})
}

fn offset(&self) -> usize {
0
}
Expand Down
9 changes: 9 additions & 0 deletions arrow-array/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,15 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
self.value_offsets.len() <= 1
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
values: self.values.shrink_to_fit(),
value_offsets: self.value_offsets.clone().shrink_to_fit(),
})
}

fn offset(&self) -> usize {
0
}
Expand Down
10 changes: 10 additions & 0 deletions arrow-array/src/array/list_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,16 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSize> {
self.value_sizes.is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
values: self.values.shrink_to_fit(),
value_offsets: self.value_offsets.clone().shrink_to_fit(),
value_sizes: self.value_sizes.clone().shrink_to_fit(),
})
}

fn offset(&self) -> usize {
0
}
Expand Down
16 changes: 16 additions & 0 deletions arrow-array/src/array/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,22 @@ impl Array for MapArray {
self.value_offsets.len() <= 1
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
entries: self
.entries
.clone()
.shrink_to_fit()
.as_any()
.downcast_ref::<StructArray>()
.unwrap()
.clone(),
value_offsets: self.value_offsets.clone().shrink_to_fit(),
})
}

fn offset(&self) -> usize {
0
}
Expand Down
11 changes: 11 additions & 0 deletions arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,10 @@ pub trait Array: std::fmt::Debug + Send + Sync {
/// ```
fn is_empty(&self) -> bool;

/// Frees up unused memory.
#[must_use]
fn shrink_to_fit(&self) -> ArrayRef;

/// Returns the offset into the underlying data used by this array(-slice).
/// Note that the underlying data can be shared by many arrays.
/// This defaults to `0`.
Expand Down Expand Up @@ -365,6 +369,10 @@ impl Array for ArrayRef {
self.as_ref().is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
self.as_ref().shrink_to_fit()
}

fn offset(&self) -> usize {
self.as_ref().offset()
}
Expand Down Expand Up @@ -434,6 +442,9 @@ impl<T: Array> Array for &T {
fn is_empty(&self) -> bool {
T::is_empty(self)
}
fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(T::shrink_to_fit(self))
}

fn offset(&self) -> usize {
T::offset(self)
Expand Down
4 changes: 4 additions & 0 deletions arrow-array/src/array/null_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ impl Array for NullArray {
self.len == 0
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self { len: self.len })
}

fn offset(&self) -> usize {
0
}
Expand Down
8 changes: 8 additions & 0 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1152,6 +1152,14 @@ impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
self.values.is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
values: self.values.clone().shrink_to_fit(),
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
})
}

fn offset(&self) -> usize {
0
}
Expand Down
12 changes: 12 additions & 0 deletions arrow-array/src/array/run_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,14 @@ impl<T: RunEndIndexType> Array for RunArray<T> {
self.run_ends.is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
run_ends: self.run_ends.clone().shrink_to_fit(),
values: self.values.shrink_to_fit(),
})
}

fn offset(&self) -> usize {
self.run_ends.offset()
}
Expand Down Expand Up @@ -584,6 +592,10 @@ impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
self.run_array.is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
unimplemented!("shrink_to_fit cannot be implemented for TypedRunArray")
}

fn offset(&self) -> usize {
self.run_array.offset()
}
Expand Down
9 changes: 9 additions & 0 deletions arrow-array/src/array/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,15 @@ impl Array for StructArray {
self.len == 0
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
len: self.len,
data_type: self.data_type.clone(),
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
fields: self.fields.iter().map(|n| n.shrink_to_fit()).collect(),
})
}

fn offset(&self) -> usize {
0
}
Expand Down
13 changes: 13 additions & 0 deletions arrow-array/src/array/union_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,19 @@ impl Array for UnionArray {
self.type_ids.is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
type_ids: self.type_ids.clone().shrink_to_fit(),
offsets: self.offsets.clone().map(|o| o.shrink_to_fit()),
fields: self
.fields
.iter()
.map(|option| option.as_ref().map(|n| n.shrink_to_fit()))
.collect(),
})
}

fn offset(&self) -> usize {
0
}
Expand Down

0 comments on commit 53d3eef

Please sign in to comment.