Skip to content

Commit

Permalink
perf: Zerocopy buffers for FixedSizeBinary to BinaryView cast (#1…
Browse files Browse the repository at this point in the history
  • Loading branch information
coastalwhite authored Aug 5, 2024
1 parent 06e4fa4 commit fd00ee6
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 15 deletions.
55 changes: 42 additions & 13 deletions crates/polars-arrow/src/array/binview/view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,15 @@ impl View {
unsafe { std::mem::transmute(self) }
}

/// Create a new inline view
/// Create a new inline view without verifying the length
///
/// # Panics
/// # Safety
///
/// Panics if the `bytes.len() > View::MAX_INLINE_SIZE`.
/// It needs to hold that `bytes.len() <= View::MAX_INLINE_SIZE`.
#[inline]
pub fn new_inline(bytes: &[u8]) -> Self {
pub unsafe fn new_inline_unchecked(bytes: &[u8]) -> Self {
debug_assert!(bytes.len() <= u32::MAX as usize);
assert!(bytes.len() as u32 <= Self::MAX_INLINE_SIZE);
debug_assert!(bytes.len() as u32 <= Self::MAX_INLINE_SIZE);

let mut view = Self {
length: bytes.len() as u32,
Expand All @@ -92,18 +92,47 @@ impl View {
view
}

/// Create a new inline view
///
/// # Panics
///
/// Panics if the `bytes.len() > View::MAX_INLINE_SIZE`.
#[inline]
pub fn new_inline(bytes: &[u8]) -> Self {
assert!(bytes.len() as u32 <= Self::MAX_INLINE_SIZE);
unsafe { Self::new_inline_unchecked(bytes) }
}

/// Create a new inline view
///
/// # Safety
///
/// It needs to hold that `bytes.len() > View::MAX_INLINE_SIZE`.
#[inline]
pub unsafe fn new_noninline_unchecked(bytes: &[u8], buffer_idx: u32, offset: u32) -> Self {
debug_assert!(bytes.len() <= u32::MAX as usize);
debug_assert!(bytes.len() as u32 > View::MAX_INLINE_SIZE);

// SAFETY: The invariant of this function guarantees that this is safe.
let prefix = unsafe { u32::from_le_bytes(bytes[0..4].try_into().unwrap_unchecked()) };
Self {
length: bytes.len() as u32,
prefix,
buffer_idx,
offset,
}
}

#[inline]
pub fn new_from_bytes(bytes: &[u8], buffer_idx: u32, offset: u32) -> Self {
debug_assert!(bytes.len() <= u32::MAX as usize);

if bytes.len() as u32 <= Self::MAX_INLINE_SIZE {
Self::new_inline(bytes)
} else {
Self {
length: bytes.len() as u32,
prefix: u32::from_le_bytes(bytes[0..4].try_into().unwrap()),
buffer_idx,
offset,
// SAFETY: We verify the invariant with the outer if statement
unsafe {
if bytes.len() as u32 <= Self::MAX_INLINE_SIZE {
Self::new_inline_unchecked(bytes)
} else {
Self::new_noninline_unchecked(bytes, buffer_idx, offset)
}
}
}
Expand Down
65 changes: 63 additions & 2 deletions crates/polars-arrow/src/compute/cast/binary_to.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
use std::sync::Arc;

use polars_error::PolarsResult;

use super::CastOptionsImpl;
use crate::array::*;
use crate::buffer::Buffer;
use crate::datatypes::ArrowDataType;
use crate::offset::{Offset, Offsets};
use crate::types::NativeType;
Expand Down Expand Up @@ -179,8 +182,66 @@ pub fn fixed_size_binary_binary<O: Offset>(
}

pub fn fixed_size_binary_to_binview(from: &FixedSizeBinaryArray) -> BinaryViewArray {
let mutable = MutableBinaryViewArray::from_values_iter(from.values_iter());
mutable.freeze().with_validity(from.validity().cloned())
let datatype = <[u8] as ViewType>::DATA_TYPE;

// Fast path: all the views are inlineable
if from.size() <= View::MAX_INLINE_SIZE as usize {
// @NOTE: There is something with the code-generation of `View::new_inline_unchecked` that
// prevents it from properly SIMD-ing this loop. It insists on memcpying while it should
// know that the size is really small. Dispatching over the `from.size()` and making it
// constant does make loop SIMD, but it does not actually speed anything up and the code it
// generates is still horrible.
//
// This is really slow, and I don't think it has to be.

// SAFETY: We checked that slice.len() <= View::MAX_INLINE_SIZE before
let views = from
.values_iter()
.map(|slice| unsafe { View::new_inline_unchecked(slice) })
.collect::<Buffer<_>>();
return BinaryViewArray::try_new(datatype, views, Arc::default(), from.validity().cloned())
.unwrap();
}

const MAX_BYTES_PER_BUFFER: usize = u32::MAX as usize;

let size = from.size();
let num_bytes = from.len() * size;
let num_buffers = num_bytes.div_ceil(MAX_BYTES_PER_BUFFER);
assert!(num_buffers < u32::MAX as usize);

let num_elements_per_buffer = MAX_BYTES_PER_BUFFER / size;
// This is NOT equal to MAX_BYTES_PER_BUFFER because of integer division
let split_point = num_elements_per_buffer * size;

// This is zero-copy for the buffer since split just increases the the data since
let mut buffer = from.values().clone();
let mut buffers = Vec::with_capacity(num_buffers);
for _ in 0..num_buffers - 1 {
let slice;
(slice, buffer) = buffer.split_at(split_point);
buffers.push(slice);
}
buffers.push(buffer);

let mut iter = from.values_iter();
let iter = iter.by_ref();
let mut views = Vec::with_capacity(from.len());
for buffer_idx in 0..num_buffers {
views.extend(
iter.take(num_elements_per_buffer)
.enumerate()
.map(|(i, slice)| {
// SAFETY: We checked that slice.len() > View::MAX_INLINE_SIZE before
unsafe {
View::new_noninline_unchecked(slice, buffer_idx as u32, (i * size) as u32)
}
}),
);
}
let views = views.into();

BinaryViewArray::try_new(datatype, views, buffers.into(), from.validity().cloned()).unwrap()
}

/// Conversion of binary
Expand Down

0 comments on commit fd00ee6

Please sign in to comment.