diff --git a/crates/nano-arrow/src/io/parquet/read/schema/convert.rs b/crates/nano-arrow/src/io/parquet/read/schema/convert.rs index 4ae50e05e8e0..4f55ffb872d3 100644 --- a/crates/nano-arrow/src/io/parquet/read/schema/convert.rs +++ b/crates/nano-arrow/src/io/parquet/read/schema/convert.rs @@ -343,12 +343,12 @@ fn to_list( let field = fields.first().unwrap(); ( &field.get_field_info().name, - field.get_field_info().repetition != Repetition::Required, + field.get_field_info().repetition == Repetition::Optional, ) }, _ => ( &item.get_field_info().name, - item.get_field_info().repetition != Repetition::Required, + item.get_field_info().repetition == Repetition::Optional, ), }; @@ -596,7 +596,7 @@ mod tests { { arrow_fields.push(Field::new( "my_list", - DataType::List(Box::new(Field::new("element", DataType::Utf8, true))), + DataType::List(Box::new(Field::new("element", DataType::Utf8, false))), true, )); } @@ -608,7 +608,7 @@ mod tests { { arrow_fields.push(Field::new( "my_list", - DataType::List(Box::new(Field::new("element", DataType::Int32, true))), + DataType::List(Box::new(Field::new("element", DataType::Int32, false))), true, )); } @@ -627,7 +627,7 @@ mod tests { ]); arrow_fields.push(Field::new( "my_list", - DataType::List(Box::new(Field::new("element", arrow_struct, true))), + DataType::List(Box::new(Field::new("element", arrow_struct, false))), true, )); } @@ -643,7 +643,7 @@ mod tests { let arrow_struct = DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]); arrow_fields.push(Field::new( "my_list", - DataType::List(Box::new(Field::new("array", arrow_struct, true))), + DataType::List(Box::new(Field::new("array", arrow_struct, false))), true, )); } @@ -659,7 +659,7 @@ mod tests { let arrow_struct = DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]); arrow_fields.push(Field::new( "my_list", - DataType::List(Box::new(Field::new("my_list_tuple", arrow_struct, true))), + DataType::List(Box::new(Field::new("my_list_tuple", arrow_struct, false))), true, )); } @@ -669,8 +669,50 @@ mod tests { { arrow_fields.push(Field::new( "name", - DataType::List(Box::new(Field::new("name", DataType::Int32, true))), - true, + DataType::List(Box::new(Field::new("name", DataType::Int32, false))), + false, + )); + } + + let parquet_schema = SchemaDescriptor::try_from_message(message_type)?; + let fields = parquet_to_arrow_schema(parquet_schema.fields()); + + assert_eq!(arrow_fields, fields); + Ok(()) + } + + #[test] + fn test_parquet_list_with_struct() -> Result<()> { + let mut arrow_fields = Vec::new(); + + let message_type = " + message eventlog { + REQUIRED group events (LIST) { + REPEATED group array { + REQUIRED BYTE_ARRAY event_name (STRING); + REQUIRED INT64 event_time (TIMESTAMP(MILLIS,true)); + } + } + } + "; + + { + let struct_fields = vec![ + Field::new("event_name", DataType::Utf8, false), + Field::new( + "event_time", + DataType::Timestamp(TimeUnit::Millisecond, Some("+00:00".into())), + false, + ), + ]; + arrow_fields.push(Field::new( + "events", + DataType::List(Box::new(Field::new( + "array", + DataType::Struct(struct_fields), + false, + ))), + false, )); } @@ -797,9 +839,9 @@ mod tests { DataType::List(Box::new(Field::new( "innerGroup", DataType::Struct(vec![Field::new("leaf3", DataType::Int32, true)]), - true, + false, ))), - true, + false, ); let outer_group_list = Field::new( @@ -810,9 +852,9 @@ mod tests { Field::new("leaf2", DataType::Int32, true), inner_group_list, ]), - true, + false, ))), - true, + false, ); arrow_fields.push(outer_group_list); } @@ -873,8 +915,8 @@ mod tests { Field::new("string", DataType::Utf8, true), Field::new( "bools", - DataType::List(Box::new(Field::new("bools", DataType::Boolean, true))), - true, + DataType::List(Box::new(Field::new("bools", DataType::Boolean, false))), + false, ), Field::new("date", DataType::Date32, true), Field::new("time_milli", DataType::Time32(TimeUnit::Millisecond), true),