Skip to content

Commit

Permalink
perf: Optimize strings slices (#17996)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Aug 1, 2024
1 parent 316d621 commit f4e2a09
Show file tree
Hide file tree
Showing 4 changed files with 200 additions and 100 deletions.
23 changes: 23 additions & 0 deletions crates/polars-arrow/src/array/binview/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,29 @@ impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
)
}

/// Apply a function over the views. This can be used to update views in operations like slicing.
///
/// # Safety
/// Update the views. All invariants of the views apply.
pub unsafe fn apply_views<F: FnMut(View, &T) -> View>(&self, mut update_view: F) -> Self {
let arr = self.clone();
let (views, buffers, validity, total_bytes_len, total_buffer_len) = arr.into_inner();

let mut views = views.make_mut();
for v in views.iter_mut() {
let str_slice = T::from_bytes_unchecked(v.get_slice_unchecked(&buffers));
*v = update_view(*v, str_slice);
}
Self::new_unchecked(
self.data_type.clone(),
views.into(),
buffers,
validity,
total_bytes_len,
total_buffer_len,
)
}

pub fn try_new(
data_type: ArrowDataType,
views: Buffer<View>,
Expand Down
12 changes: 12 additions & 0 deletions crates/polars-core/src/chunked_array/ops/apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -573,3 +573,15 @@ where
});
}
}

impl StringChunked {
/// # Safety
/// Update the views. All invariants of the views apply.
pub unsafe fn apply_views<F: FnMut(View, &str) -> View + Copy>(&self, update_view: F) -> Self {
let mut out = self.clone();
for arr in out.downcast_iter_mut() {
*arr = arr.apply_views(update_view);
}
out
}
}
4 changes: 2 additions & 2 deletions crates/polars-ops/src/chunked_array/strings/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ pub trait StringNameSpaceImpl: AsString {
let ca = self.as_string();
let n = n.strict_cast(&DataType::Int64)?;

Ok(substring::head(ca, n.i64()?))
substring::head(ca, n.i64()?)
}

/// Slice the last `n` values of the string.
Expand All @@ -633,7 +633,7 @@ pub trait StringNameSpaceImpl: AsString {
let ca = self.as_string();
let n = n.strict_cast(&DataType::Int64)?;

Ok(substring::tail(ca, n.i64()?))
substring::tail(ca, n.i64()?)
}
}

Expand Down
Loading

0 comments on commit f4e2a09

Please sign in to comment.