Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port codegen of arrow datatype to arrow1 #8206

Merged
merged 5 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -6264,6 +6264,7 @@ version = "0.21.0-alpha.1+dev"
dependencies = [
"anyhow",
"array-init",
"arrow",
"bytemuck",
"document-features",
"ecolor",
Expand Down Expand Up @@ -6302,6 +6303,7 @@ dependencies = [
name = "re_types_blueprint"
version = "0.21.0-alpha.1+dev"
dependencies = [
"arrow",
"once_cell",
"re_arrow2",
"re_tracing",
Expand Down
16 changes: 8 additions & 8 deletions crates/build/re_types_builder/src/codegen/cpp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::collections::HashSet;

use camino::{Utf8Path, Utf8PathBuf};
use itertools::Itertools;
use proc_macro2::{Ident, TokenStream};
use proc_macro2::{Ident, Literal, TokenStream};
use quote::{format_ident, quote};
use rayon::prelude::*;

Expand Down Expand Up @@ -1241,7 +1241,7 @@ impl QuotedObject {
let field_name = field_name_identifier(obj_field);

// We assign the arrow type index to the enum fields to make encoding simpler and faster:
let arrow_type_index = proc_macro2::Literal::usize_unsuffixed(enum_value as _);
let arrow_type_index = Literal::usize_unsuffixed(enum_value as _);

quote! {
#NEWLINE_TOKEN
Expand Down Expand Up @@ -1417,7 +1417,7 @@ fn arrow_data_type_method(
cpp_includes.insert_system("arrow/type_fwd.h");
hpp_declarations.insert("arrow", ForwardDecl::Class(format_ident!("DataType")));

let quoted_datatype = quote_arrow_data_type(
let quoted_datatype = quote_arrow_datatype(
&Type::Object(obj.fqname.clone()),
objects,
cpp_includes,
Expand Down Expand Up @@ -2234,7 +2234,7 @@ fn quote_field_type(includes: &mut Includes, obj_field: &ObjectField) -> TokenSt
Type::Array { elem_type, length } => {
includes.insert_system("array");
let elem_type = quote_element_type(includes, elem_type);
let length = proc_macro2::Literal::usize_unsuffixed(*length);
let length = Literal::usize_unsuffixed(*length);
quote! { std::array<#elem_type, #length> }
}
Type::Vector { elem_type } => {
Expand Down Expand Up @@ -2401,7 +2401,7 @@ fn quote_integer<T: std::fmt::Display>(t: T) -> TokenStream {
quote!(#t)
}

fn quote_arrow_data_type(
fn quote_arrow_datatype(
typ: &Type,
objects: &Objects,
includes: &mut Includes,
Expand Down Expand Up @@ -2444,7 +2444,7 @@ fn quote_arrow_data_type(
let quoted_fqname = quote_fqname_as_type_path(includes, fqname);
quote!(Loggable<#quoted_fqname>::arrow_datatype())
} else if obj.is_arrow_transparent() {
quote_arrow_data_type(&obj.fields[0].typ, objects, includes, false)
quote_arrow_datatype(&obj.fields[0].typ, objects, includes, false)
} else {
let quoted_fields = obj
.fields
Expand Down Expand Up @@ -2480,7 +2480,7 @@ fn quote_arrow_field_type(
includes: &mut Includes,
) -> TokenStream {
let name = &field.name;
let datatype = quote_arrow_data_type(&field.typ, objects, includes, false);
let datatype = quote_arrow_datatype(&field.typ, objects, includes, false);
let is_nullable = field.is_nullable || field.typ == Type::Unit; // null type is always nullable

quote! {
Expand All @@ -2494,7 +2494,7 @@ fn quote_arrow_elem_type(
includes: &mut Includes,
) -> TokenStream {
let typ: Type = elem_type.clone().into();
let datatype = quote_arrow_data_type(&typ, objects, includes, false);
let datatype = quote_arrow_datatype(&typ, objects, includes, false);
let is_nullable = typ == Type::Unit; // null type must be nullable
quote! {
arrow::field("item", #datatype, #is_nullable)
Expand Down
17 changes: 10 additions & 7 deletions crates/build/re_types_builder/src/codegen/rust/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -864,17 +864,17 @@ fn quote_trait_impls_for_datatype_or_component(
let quoted_arrow_datatype = if let Some(forwarded_type) = forwarded_type.as_ref() {
quote! {
#[inline]
fn arrow2_datatype() -> arrow2::datatypes::DataType {
#forwarded_type::arrow2_datatype()
fn arrow_datatype() -> arrow::datatypes::DataType {
#forwarded_type::arrow_datatype()
}
}
} else {
let datatype = ArrowDataTypeTokenizer(&datatype, false);
quote! {
#[inline]
fn arrow2_datatype() -> arrow2::datatypes::DataType {
fn arrow_datatype() -> arrow::datatypes::DataType {
#![allow(clippy::wildcard_imports)]
use arrow2::datatypes::*;
use arrow::datatypes::*;
#datatype
}
}
Expand Down Expand Up @@ -906,7 +906,8 @@ fn quote_trait_impls_for_datatype_or_component(
// re_tracing::profile_function!();

#![allow(clippy::wildcard_imports)]
use arrow2::{datatypes::*, array::*, buffer::*};
use arrow::datatypes::*;
use arrow2::{ array::*, buffer::*};
use ::re_types_core::{Loggable as _, ResultExt as _};

// This code-path cannot have null fields. If it does have a validity mask
Expand Down Expand Up @@ -948,7 +949,8 @@ fn quote_trait_impls_for_datatype_or_component(
// re_tracing::profile_function!();

#![allow(clippy::wildcard_imports)]
use arrow2::{datatypes::*, array::*, buffer::*};
use arrow::datatypes::*;
use arrow2::{ array::*, buffer::*};
use ::re_types_core::{Loggable as _, ResultExt as _};
Ok(#quoted_deserializer)
}
Expand Down Expand Up @@ -987,7 +989,8 @@ fn quote_trait_impls_for_datatype_or_component(

#![allow(clippy::wildcard_imports)]
#![allow(clippy::manual_is_variant_and)]
use arrow2::{datatypes::*, array::*};
use arrow::datatypes::*;
use arrow2::array::*;
use ::re_types_core::{Loggable as _, ResultExt as _};

Ok(#quoted_serializer)
Expand Down
20 changes: 14 additions & 6 deletions crates/build/re_types_builder/src/codegen/rust/arrow.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use arrow2::datatypes::DataType;
use proc_macro2::TokenStream;
use proc_macro2::{Literal, TokenStream};
use quote::quote;

// ---
Expand Down Expand Up @@ -37,6 +37,7 @@ impl quote::ToTokens for ArrowDataTypeTokenizer<'_> {

DataType::FixedSizeList(field, length) => {
let field = ArrowFieldTokenizer(field);
let length = Literal::usize_unsuffixed(*length);
quote!(DataType::FixedSizeList(std::sync::Arc::new(#field), #length))
}

Expand All @@ -47,25 +48,32 @@ impl quote::ToTokens for ArrowDataTypeTokenizer<'_> {
UnionMode::Sparse => quote!(UnionMode::Sparse),
};
if let Some(types) = types {
let types = types.iter().map(|&t| {
Literal::i8_unsuffixed(i8::try_from(t).unwrap_or_else(|_| {
panic!("Expect union type tag to be in 0-127; got {t}")
}))
});
quote!(DataType::Union(
std::sync::Arc::new(vec![ #(#fields,)* ]),
Some(std::sync::Arc::new(vec![ #(#types,)* ])),
UnionFields::new(
vec![ #(#types,)* ],
vec![ #(#fields,)* ],
),
#mode,
))
} else {
quote!(DataType::Union(std::sync::Arc::new(vec![ #(#fields,)* ]), None, #mode))
quote!(DataType::Union(UnionFields::from(vec![ #(#fields,)* ]), #mode))
}
}

DataType::Struct(fields) => {
let fields = fields.iter().map(ArrowFieldTokenizer);
quote!(DataType::Struct(std::sync::Arc::new(vec![ #(#fields,)* ])))
quote!(DataType::Struct(Fields::from(vec![ #(#fields,)* ])))
}

DataType::Extension(fqname, datatype, _metadata) => {
if *recursive {
let fqname_use = quote_fqname_as_type_path(fqname);
quote!(<#fqname_use>::arrow2_datatype())
quote!(<#fqname_use>::arrow_datatype())
} else {
let datatype = ArrowDataTypeTokenizer(datatype.to_logical_type(), false);
quote!(#datatype)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pub fn quote_arrow_deserializer(
let data_src = format_ident!("arrow_data");

let datatype = &arrow_registry.get(&obj.fqname);
let quoted_self_datatype = quote! { Self::arrow2_datatype() };
let quoted_self_datatype = quote! { Self::arrow_datatype() };

let obj_fqname = obj.fqname.as_str();
let is_enum = obj.is_enum();
Expand Down
26 changes: 13 additions & 13 deletions crates/build/re_types_builder/src/codegen/rust/serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pub fn quote_arrow_serializer(
) -> TokenStream {
let datatype = &arrow_registry.get(&obj.fqname);

let quoted_datatype = quote! { Self::arrow2_datatype() };
let quoted_datatype = quote! { Self::arrow_datatype() };

let is_enum = obj.is_enum();
let is_arrow_transparent = obj.datatype.is_none();
Expand Down Expand Up @@ -211,7 +211,7 @@ pub fn quote_arrow_serializer(
#quoted_bitmap;

StructArray::new(
#quoted_datatype,
#quoted_datatype.into(),
vec![#(#quoted_field_serializers,)*],
bitmap,
).boxed()
Expand Down Expand Up @@ -257,14 +257,14 @@ pub fn quote_arrow_serializer(

let fields: Vec<_> = std::iter::repeat(
NullArray::new(
DataType::Null,
arrow2::datatypes::DataType::Null,
#data_src.len(),
).boxed()
).take(1 + num_variants) // +1 for the virtual `nulls` arm
.collect();

UnionArray::new(
#quoted_datatype,
#quoted_datatype.into(),
types,
fields,
None,
Expand All @@ -290,7 +290,7 @@ pub fn quote_arrow_serializer(
if obj_field.typ == crate::Type::Unit {
return quote! {
NullArray::new(
DataType::Null,
arrow2::datatypes::DataType::Null,
#data_src
.iter()
.filter(|datum| matches!(datum.as_deref(), Some(Self::#quoted_obj_field_name)))
Expand Down Expand Up @@ -336,7 +336,7 @@ pub fn quote_arrow_serializer(
let quoted_fields = quote! {
vec![
NullArray::new(
DataType::Null,
arrow2::datatypes::DataType::Null,
#data_src.iter().filter(|v| v.is_none()).count(),
).boxed(),
#(#quoted_field_serializers,)*
Expand Down Expand Up @@ -426,7 +426,7 @@ pub fn quote_arrow_serializer(
let offsets = Some(#quoted_offsets);

UnionArray::new(
#quoted_datatype,
#quoted_datatype.into(),
types,
fields,
offsets,
Expand Down Expand Up @@ -547,7 +547,7 @@ fn quote_arrow_field_serializer(
if datatype.to_logical_type() == &DataType::Boolean {
quote! {
BooleanArray::new(
#quoted_datatype,
#quoted_datatype.into(),
#data_src.into_iter() #quoted_transparent_mapping .collect(),
#bitmap_src,
).boxed()
Expand All @@ -558,14 +558,14 @@ fn quote_arrow_field_serializer(
// to a buffer type.
InnerRepr::ArrowBuffer => quote! {
PrimitiveArray::new(
#quoted_datatype,
#quoted_datatype.into(),
#data_src,
#bitmap_src,
).boxed()
},
InnerRepr::NativeIterable => quote! {
PrimitiveArray::new(
#quoted_datatype,
#quoted_datatype.into(),
#data_src.into_iter() #quoted_transparent_mapping .collect(),
#bitmap_src,
).boxed()
Expand Down Expand Up @@ -654,7 +654,7 @@ fn quote_arrow_field_serializer(
// It would be nice to use quote_comment here and put this safety notice in the generated code,
// but that seems to push us over some complexity limit causing rustfmt to fail.
#[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
unsafe { Utf8Array::<i32>::new_unchecked(#quoted_datatype, offsets, inner_data, #bitmap_src) }.boxed()
unsafe { Utf8Array::<i32>::new_unchecked(#quoted_datatype.into(), offsets, inner_data, #bitmap_src) }.boxed()
}}
}

Expand Down Expand Up @@ -809,7 +809,7 @@ fn quote_arrow_field_serializer(
let quoted_create = if let DataType::List(_) = datatype {
quote! {
ListArray::try_new(
#quoted_datatype,
#quoted_datatype.into(),
offsets,
#quoted_inner,
#bitmap_src,
Expand All @@ -818,7 +818,7 @@ fn quote_arrow_field_serializer(
} else {
quote! {
FixedSizeListArray::new(
#quoted_datatype,
#quoted_datatype.into(),
#quoted_inner,
#bitmap_src,
).boxed()
Expand Down
1 change: 1 addition & 0 deletions crates/store/re_types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ re_video = { workspace = true, optional = true }
# External
anyhow.workspace = true # TODO(#1845): Use thiserror instead
array-init.workspace = true
arrow.workspace = true
arrow2 = { workspace = true, features = [
"io_ipc",
"io_print",
Expand Down
4 changes: 2 additions & 2 deletions crates/store/re_types/src/blueprint/components/active_tab.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading