Skip to content

Commit

Permalink
Port codegen of arrow datatype to arrow1
Browse files Browse the repository at this point in the history
  • Loading branch information
emilk committed Nov 22, 2024
1 parent 325ee0b commit f9ba68b
Show file tree
Hide file tree
Showing 198 changed files with 1,669 additions and 1,341 deletions.
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -6264,6 +6264,7 @@ version = "0.21.0-alpha.1+dev"
dependencies = [
"anyhow",
"array-init",
"arrow",
"bytemuck",
"document-features",
"ecolor",
Expand Down
16 changes: 8 additions & 8 deletions crates/build/re_types_builder/src/codegen/cpp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::collections::HashSet;

use camino::{Utf8Path, Utf8PathBuf};
use itertools::Itertools;
use proc_macro2::{Ident, TokenStream};
use proc_macro2::{Ident, Literal, TokenStream};
use quote::{format_ident, quote};
use rayon::prelude::*;

Expand Down Expand Up @@ -1241,7 +1241,7 @@ impl QuotedObject {
let field_name = field_name_identifier(obj_field);

// We assign the arrow type index to the enum fields to make encoding simpler and faster:
let arrow_type_index = proc_macro2::Literal::usize_unsuffixed(enum_value as _);
let arrow_type_index = Literal::usize_unsuffixed(enum_value as _);

quote! {
#NEWLINE_TOKEN
Expand Down Expand Up @@ -1417,7 +1417,7 @@ fn arrow_data_type_method(
cpp_includes.insert_system("arrow/type_fwd.h");
hpp_declarations.insert("arrow", ForwardDecl::Class(format_ident!("DataType")));

let quoted_datatype = quote_arrow_data_type(
let quoted_datatype = quote_arrow_datatype(
&Type::Object(obj.fqname.clone()),
objects,
cpp_includes,
Expand Down Expand Up @@ -2234,7 +2234,7 @@ fn quote_field_type(includes: &mut Includes, obj_field: &ObjectField) -> TokenSt
Type::Array { elem_type, length } => {
includes.insert_system("array");
let elem_type = quote_element_type(includes, elem_type);
let length = proc_macro2::Literal::usize_unsuffixed(*length);
let length = Literal::usize_unsuffixed(*length);
quote! { std::array<#elem_type, #length> }
}
Type::Vector { elem_type } => {
Expand Down Expand Up @@ -2401,7 +2401,7 @@ fn quote_integer<T: std::fmt::Display>(t: T) -> TokenStream {
quote!(#t)
}

fn quote_arrow_data_type(
fn quote_arrow_datatype(
typ: &Type,
objects: &Objects,
includes: &mut Includes,
Expand Down Expand Up @@ -2444,7 +2444,7 @@ fn quote_arrow_data_type(
let quoted_fqname = quote_fqname_as_type_path(includes, fqname);
quote!(Loggable<#quoted_fqname>::arrow_datatype())
} else if obj.is_arrow_transparent() {
quote_arrow_data_type(&obj.fields[0].typ, objects, includes, false)
quote_arrow_datatype(&obj.fields[0].typ, objects, includes, false)
} else {
let quoted_fields = obj
.fields
Expand Down Expand Up @@ -2480,7 +2480,7 @@ fn quote_arrow_field_type(
includes: &mut Includes,
) -> TokenStream {
let name = &field.name;
let datatype = quote_arrow_data_type(&field.typ, objects, includes, false);
let datatype = quote_arrow_datatype(&field.typ, objects, includes, false);
let is_nullable = field.is_nullable || field.typ == Type::Unit; // null type is always nullable

quote! {
Expand All @@ -2494,7 +2494,7 @@ fn quote_arrow_elem_type(
includes: &mut Includes,
) -> TokenStream {
let typ: Type = elem_type.clone().into();
let datatype = quote_arrow_data_type(&typ, objects, includes, false);
let datatype = quote_arrow_datatype(&typ, objects, includes, false);
let is_nullable = typ == Type::Unit; // null type must be nullable
quote! {
arrow::field("item", #datatype, #is_nullable)
Expand Down
17 changes: 10 additions & 7 deletions crates/build/re_types_builder/src/codegen/rust/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -864,17 +864,17 @@ fn quote_trait_impls_for_datatype_or_component(
let quoted_arrow_datatype = if let Some(forwarded_type) = forwarded_type.as_ref() {
quote! {
#[inline]
fn arrow2_datatype() -> arrow2::datatypes::DataType {
#forwarded_type::arrow2_datatype()
fn arrow_datatype() -> arrow::datatypes::DataType {
#forwarded_type::arrow_datatype()
}
}
} else {
let datatype = ArrowDataTypeTokenizer(&datatype, false);
quote! {
#[inline]
fn arrow2_datatype() -> arrow2::datatypes::DataType {
fn arrow_datatype() -> arrow::datatypes::DataType {
#![allow(clippy::wildcard_imports)]
use arrow2::datatypes::*;
use arrow::datatypes::*;
#datatype
}
}
Expand Down Expand Up @@ -906,7 +906,8 @@ fn quote_trait_impls_for_datatype_or_component(
// re_tracing::profile_function!();

#![allow(clippy::wildcard_imports)]
use arrow2::{datatypes::*, array::*, buffer::*};
use arrow::datatypes::*;
use arrow2::{ array::*, buffer::*};
use ::re_types_core::{Loggable as _, ResultExt as _};

// This code-path cannot have null fields. If it does have a validity mask
Expand Down Expand Up @@ -948,7 +949,8 @@ fn quote_trait_impls_for_datatype_or_component(
// re_tracing::profile_function!();

#![allow(clippy::wildcard_imports)]
use arrow2::{datatypes::*, array::*, buffer::*};
use arrow::datatypes::*;
use arrow2::{ array::*, buffer::*};
use ::re_types_core::{Loggable as _, ResultExt as _};
Ok(#quoted_deserializer)
}
Expand Down Expand Up @@ -987,7 +989,8 @@ fn quote_trait_impls_for_datatype_or_component(

#![allow(clippy::wildcard_imports)]
#![allow(clippy::manual_is_variant_and)]
use arrow2::{datatypes::*, array::*};
use arrow::datatypes::*;
use arrow2::array::*;
use ::re_types_core::{Loggable as _, ResultExt as _};

Ok(#quoted_serializer)
Expand Down
20 changes: 14 additions & 6 deletions crates/build/re_types_builder/src/codegen/rust/arrow.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use arrow2::datatypes::DataType;
use proc_macro2::TokenStream;
use proc_macro2::{Literal, TokenStream};
use quote::quote;

// ---
Expand Down Expand Up @@ -37,6 +37,7 @@ impl quote::ToTokens for ArrowDataTypeTokenizer<'_> {

DataType::FixedSizeList(field, length) => {
let field = ArrowFieldTokenizer(field);
let length = Literal::usize_unsuffixed(*length);
quote!(DataType::FixedSizeList(std::sync::Arc::new(#field), #length))
}

Expand All @@ -47,25 +48,32 @@ impl quote::ToTokens for ArrowDataTypeTokenizer<'_> {
UnionMode::Sparse => quote!(UnionMode::Sparse),
};
if let Some(types) = types {
let types = types.iter().map(|&t| {
Literal::i8_unsuffixed(i8::try_from(t).unwrap_or_else(|_| {
panic!("Expect union type tag to be in 0-127; got {t}")
}))
});
quote!(DataType::Union(
std::sync::Arc::new(vec![ #(#fields,)* ]),
Some(std::sync::Arc::new(vec![ #(#types,)* ])),
UnionFields::new(
vec![ #(#types,)* ],
vec![ #(#fields,)* ],
),
#mode,
))
} else {
quote!(DataType::Union(std::sync::Arc::new(vec![ #(#fields,)* ]), None, #mode))
quote!(DataType::Union(UnionFields::from(vec![ #(#fields,)* ]), #mode))
}
}

DataType::Struct(fields) => {
let fields = fields.iter().map(ArrowFieldTokenizer);
quote!(DataType::Struct(std::sync::Arc::new(vec![ #(#fields,)* ])))
quote!(DataType::Struct(Fields::from(vec![ #(#fields,)* ])))
}

DataType::Extension(fqname, datatype, _metadata) => {
if *recursive {
let fqname_use = quote_fqname_as_type_path(fqname);
quote!(<#fqname_use>::arrow2_datatype())
quote!(<#fqname_use>::arrow_datatype())
} else {
let datatype = ArrowDataTypeTokenizer(datatype.to_logical_type(), false);
quote!(#datatype)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pub fn quote_arrow_deserializer(
let data_src = format_ident!("arrow_data");

let datatype = &arrow_registry.get(&obj.fqname);
let quoted_self_datatype = quote! { Self::arrow2_datatype() };
let quoted_self_datatype = quote! { Self::arrow_datatype() };

let obj_fqname = obj.fqname.as_str();
let is_enum = obj.is_enum();
Expand Down
26 changes: 13 additions & 13 deletions crates/build/re_types_builder/src/codegen/rust/serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pub fn quote_arrow_serializer(
) -> TokenStream {
let datatype = &arrow_registry.get(&obj.fqname);

let quoted_datatype = quote! { Self::arrow2_datatype() };
let quoted_datatype = quote! { Self::arrow_datatype() };

let is_enum = obj.is_enum();
let is_arrow_transparent = obj.datatype.is_none();
Expand Down Expand Up @@ -211,7 +211,7 @@ pub fn quote_arrow_serializer(
#quoted_bitmap;

StructArray::new(
#quoted_datatype,
#quoted_datatype.into(),
vec![#(#quoted_field_serializers,)*],
bitmap,
).boxed()
Expand Down Expand Up @@ -257,14 +257,14 @@ pub fn quote_arrow_serializer(

let fields: Vec<_> = std::iter::repeat(
NullArray::new(
DataType::Null,
arrow2::datatypes::DataType::Null,
#data_src.len(),
).boxed()
).take(1 + num_variants) // +1 for the virtual `nulls` arm
.collect();

UnionArray::new(
#quoted_datatype,
#quoted_datatype.into(),
types,
fields,
None,
Expand All @@ -290,7 +290,7 @@ pub fn quote_arrow_serializer(
if obj_field.typ == crate::Type::Unit {
return quote! {
NullArray::new(
DataType::Null,
arrow2::datatypes::DataType::Null,
#data_src
.iter()
.filter(|datum| matches!(datum.as_deref(), Some(Self::#quoted_obj_field_name)))
Expand Down Expand Up @@ -336,7 +336,7 @@ pub fn quote_arrow_serializer(
let quoted_fields = quote! {
vec![
NullArray::new(
DataType::Null,
arrow2::datatypes::DataType::Null,
#data_src.iter().filter(|v| v.is_none()).count(),
).boxed(),
#(#quoted_field_serializers,)*
Expand Down Expand Up @@ -426,7 +426,7 @@ pub fn quote_arrow_serializer(
let offsets = Some(#quoted_offsets);

UnionArray::new(
#quoted_datatype,
#quoted_datatype.into(),
types,
fields,
offsets,
Expand Down Expand Up @@ -547,7 +547,7 @@ fn quote_arrow_field_serializer(
if datatype.to_logical_type() == &DataType::Boolean {
quote! {
BooleanArray::new(
#quoted_datatype,
#quoted_datatype.into(),
#data_src.into_iter() #quoted_transparent_mapping .collect(),
#bitmap_src,
).boxed()
Expand All @@ -558,14 +558,14 @@ fn quote_arrow_field_serializer(
// to a buffer type.
InnerRepr::ArrowBuffer => quote! {
PrimitiveArray::new(
#quoted_datatype,
#quoted_datatype.into(),
#data_src,
#bitmap_src,
).boxed()
},
InnerRepr::NativeIterable => quote! {
PrimitiveArray::new(
#quoted_datatype,
#quoted_datatype.into(),
#data_src.into_iter() #quoted_transparent_mapping .collect(),
#bitmap_src,
).boxed()
Expand Down Expand Up @@ -654,7 +654,7 @@ fn quote_arrow_field_serializer(
// It would be nice to use quote_comment here and put this safety notice in the generated code,
// but that seems to push us over some complexity limit causing rustfmt to fail.
#[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
unsafe { Utf8Array::<i32>::new_unchecked(#quoted_datatype, offsets, inner_data, #bitmap_src) }.boxed()
unsafe { Utf8Array::<i32>::new_unchecked(#quoted_datatype.into(), offsets, inner_data, #bitmap_src) }.boxed()
}}
}

Expand Down Expand Up @@ -809,7 +809,7 @@ fn quote_arrow_field_serializer(
let quoted_create = if let DataType::List(_) = datatype {
quote! {
ListArray::try_new(
#quoted_datatype,
#quoted_datatype.into(),
offsets,
#quoted_inner,
#bitmap_src,
Expand All @@ -818,7 +818,7 @@ fn quote_arrow_field_serializer(
} else {
quote! {
FixedSizeListArray::new(
#quoted_datatype,
#quoted_datatype.into(),
#quoted_inner,
#bitmap_src,
).boxed()
Expand Down
1 change: 1 addition & 0 deletions crates/store/re_types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ re_video = { workspace = true, optional = true }
# External
anyhow.workspace = true # TODO(#1845): Use thiserror instead
array-init.workspace = true
arrow.workspace = true
arrow2 = { workspace = true, features = [
"io_ipc",
"io_print",
Expand Down
4 changes: 2 additions & 2 deletions crates/store/re_types/src/blueprint/components/active_tab.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f9ba68b

Please sign in to comment.