Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chunkified, (almost)deserialization-free Mesh/Asset visualizers #316

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4302,6 +4302,7 @@ dependencies = [
"ahash",
"anyhow",
"backtrace",
"bytemuck",
"criterion",
"crossbeam",
"document-features",
Expand Down Expand Up @@ -4919,7 +4920,9 @@ name = "re_space_view"
version = "0.18.0-alpha.1+dev"
dependencies = [
"ahash",
"bytemuck",
"egui",
"itertools 0.13.0",
"nohash-hasher",
"re_chunk_store",
"re_entity_db",
Expand Down Expand Up @@ -4999,6 +5002,7 @@ dependencies = [
"re_log_types",
"re_math",
"re_query",
"re_query2",
"re_renderer",
"re_space_view",
"re_tracing",
Expand Down
1 change: 1 addition & 0 deletions crates/store/re_chunk/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ arrow2 = { workspace = true, features = [
"compute_filter",
] }
backtrace.workspace = true
bytemuck.workspace = true
document-features.workspace = true
itertools.workspace = true
nohash-hasher.workspace = true
Expand Down
204 changes: 197 additions & 7 deletions crates/store/re_chunk/src/iter.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
use std::sync::Arc;

use arrow2::{
array::{Array as ArrowArray, PrimitiveArray},
array::{
Array as ArrowArray, FixedSizeListArray as ArrowFixedSizeListArray,
ListArray as ArrowListArray, PrimitiveArray as ArrowPrimitiveArray,
Utf8Array as ArrowUtf8Array,
},
Either,
};
use itertools::izip;
use itertools::{izip, Itertools};

use re_log_types::{TimeInt, Timeline};
use re_types_core::{Component, ComponentName};
use re_types_core::{ArrowBuffer, ArrowString, Component, ComponentName};

use crate::{Chunk, ChunkTimeline, RowId};

Expand Down Expand Up @@ -123,8 +127,12 @@ impl Chunk {
/// Returns an iterator over the raw arrays of a [`Chunk`], for a given component.
///
/// See also:
/// * [`Self::iter_primitive`]
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_string`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component`].
/// * [`Self::iter_primitive`].
#[inline]
pub fn iter_component_arrays(
&self,
component_name: &ComponentName,
Expand All @@ -143,6 +151,10 @@ impl Chunk {
/// Use this when working with simple arrow datatypes and performance matters (e.g. scalars,
/// points, etc).
///
/// See also:
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_string`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component_arrays`].
/// * [`Self::iter_component`].
#[inline]
Expand All @@ -157,7 +169,7 @@ impl Chunk {
let Some(values) = list_array
.values()
.as_any()
.downcast_ref::<PrimitiveArray<T>>()
.downcast_ref::<ArrowPrimitiveArray<T>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
Expand All @@ -174,6 +186,181 @@ impl Chunk {
.map(move |(idx, len)| &values[idx..idx + len]),
)
}

/// Returns an iterator over the raw primitive arrays of a [`Chunk`], for a given component.
///
/// This is a very fast path: the entire column will be downcasted at once, and then every
/// component batch will be a slice reference into that global slice.
/// Use this when working with simple arrow datatypes and performance matters (e.g. scalars,
/// points, etc).
///
/// See also:
/// * [`Self::iter_primitive`]
/// * [`Self::iter_string`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component_arrays`].
/// * [`Self::iter_component`].
pub fn iter_primitive_array<const N: usize, T: arrow2::types::NativeType>(
&self,
component_name: &ComponentName,
) -> impl Iterator<Item = &[[T; N]]> + '_
where
[T; N]: bytemuck::Pod,
{
let Some(list_array) = self.components.get(component_name) else {
return Either::Left(std::iter::empty());
};

let Some(fixed_size_list_array) = list_array
.values()
.as_any()
.downcast_ref::<ArrowFixedSizeListArray>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let Some(values) = fixed_size_list_array
.values()
.as_any()
.downcast_ref::<ArrowPrimitiveArray<T>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let size = fixed_size_list_array.size();
let values = values.values().as_slice();

// NOTE: No need for validity checks here, `iter_offsets` already takes care of that.
Either::Right(
self.iter_component_offsets(component_name)
.map(move |(idx, len)| {
bytemuck::cast_slice(&values[idx * size..idx * size + len * size])
}),
)
}

/// Returns an iterator over the raw strings of a [`Chunk`], for a given component.
///
/// This is a very fast path: the entire column will be downcasted at once, and then every
/// component batch will be a slice reference into that global slice.
/// Use this when working with simple arrow datatypes and performance matters (e.g. labels, etc).
///
/// See also:
/// * [`Self::iter_primitive`]
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component_arrays`].
/// * [`Self::iter_component`].
pub fn iter_string(
&self,
component_name: &ComponentName,
) -> impl Iterator<Item = Vec<ArrowString>> + '_ {
let Some(list_array) = self.components.get(component_name) else {
return Either::Left(std::iter::empty());
};

let Some(utf8_array) = list_array
.values()
.as_any()
.downcast_ref::<ArrowUtf8Array<i32>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let values = utf8_array.values();
let offsets = utf8_array.offsets();
let lengths = utf8_array.offsets().lengths().collect_vec();

// NOTE: No need for validity checks here, `iter_offsets` already takes care of that.
Either::Right(
self.iter_component_offsets(component_name)
.map(move |(idx, len)| {
let offsets = &offsets.as_slice()[idx..idx + len];
let lengths = &lengths.as_slice()[idx..idx + len];
izip!(offsets, lengths)
.map(|(&idx, &len)| ArrowString(values.clone().sliced(idx as _, len)))
.collect_vec()
}),
)
}

/// Returns an iterator over the raw buffers of a [`Chunk`], for a given component.
///
/// This is a very fast path: the entire column will be downcasted at once, and then every
/// component batch will be a slice reference into that global slice.
/// Use this when working with simple arrow datatypes and performance matters (e.g. blobs, etc).
///
/// See also:
/// * [`Self::iter_primitive`]
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_string`].
/// * [`Self::iter_component_arrays`].
/// * [`Self::iter_component`].
pub fn iter_buffer<T: arrow2::types::NativeType>(
&self,
component_name: &ComponentName,
) -> impl Iterator<Item = Vec<ArrowBuffer<T>>> + '_ {
let Some(list_array) = self.components.get(component_name) else {
return Either::Left(std::iter::empty());
};

let Some(inner_list_array) = list_array
.values()
.as_any()
.downcast_ref::<ArrowListArray<i32>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let Some(values) = inner_list_array
.values()
.as_any()
.downcast_ref::<ArrowPrimitiveArray<T>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let values = values.values();
let offsets = inner_list_array.offsets();
let lengths = inner_list_array.offsets().lengths().collect_vec();

// NOTE: No need for validity checks here, `iter_offsets` already takes care of that.
Either::Right(
self.iter_component_offsets(component_name)
.map(move |(idx, len)| {
let offsets = &offsets.as_slice()[idx..idx + len];
let lengths = &lengths.as_slice()[idx..idx + len];
izip!(offsets, lengths)
.map(|(&idx, &len)| values.clone().sliced(idx as _, len).into())
.collect_vec()
}),
)
}
}

// ---
Expand Down Expand Up @@ -302,8 +489,11 @@ impl Chunk {
/// through enum types across many timestamps).
///
/// See also:
/// * [`Self::iter_component`].
/// * [`Self::iter_primitive`].
/// * [`Self::iter_primitive`]
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_string`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component_arrays`].
#[inline]
pub fn iter_component<C: Component>(
&self,
Expand Down
2 changes: 1 addition & 1 deletion crates/store/re_types_core/src/arrow_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use arrow2::buffer::Buffer;
/// arise from returning a `&[T]` directly, but is significantly more
/// performant than doing the full allocation necessary to return a `Vec<T>`.
#[derive(Clone, Debug, Default, PartialEq)]
pub struct ArrowBuffer<T>(Buffer<T>);
pub struct ArrowBuffer<T>(pub Buffer<T>);

impl<T: crate::SizeBytes> crate::SizeBytes for ArrowBuffer<T> {
#[inline]
Expand Down
2 changes: 2 additions & 0 deletions crates/viewer/re_space_view/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,7 @@ re_viewer_context.workspace = true
re_viewport_blueprint.workspace = true

ahash.workspace = true
bytemuck.workspace = true
egui.workspace = true
itertools.workspace = true
nohash-hasher.workspace = true
2 changes: 1 addition & 1 deletion crates/viewer/re_space_view/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub use query2::{
pub use results_ext::{HybridLatestAtResults, HybridResults, RangeResultsExt};
pub use results_ext2::{
HybridLatestAtResults as HybridLatestAtResults2, HybridResults as HybridResults2,
RangeResultsExt as RangeResultsExt2,
HybridResultsChunkIter, RangeResultsExt as RangeResultsExt2,
};
pub use screenshot::ScreenshotMode;
pub use view_property_ui::view_property_ui;
Expand Down
Loading