diff --git a/src/arrow_reader.rs b/src/arrow_reader.rs index 71ba2f57..6c3d156f 100644 --- a/src/arrow_reader.rs +++ b/src/arrow_reader.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use std::sync::Arc; -use arrow::datatypes::{Schema, SchemaRef}; +use arrow::datatypes::SchemaRef; use arrow::error::ArrowError; use arrow::record_batch::{RecordBatch, RecordBatchReader}; @@ -52,6 +52,26 @@ impl ArrowReaderBuilder { self.schema_ref = Some(schema); self } + + /// Returns the currently computed schema + /// + /// Unless [`with_schema`](Self::with_schema) was called, this is computed dynamically + /// based on the current projection and the underlying file format. + pub fn schema(&self) -> SchemaRef { + let projected_data_type = self + .file_metadata + .root_data_type() + .project(&self.projection); + let metadata = self + .file_metadata + .user_custom_metadata() + .iter() + .map(|(key, value)| (key.clone(), String::from_utf8_lossy(value).to_string())) + .collect::>(); + self.schema_ref + .clone() + .unwrap_or_else(|| Arc::new(projected_data_type.create_arrow_schema(&metadata))) + } } impl ArrowReaderBuilder { @@ -61,6 +81,7 @@ impl ArrowReaderBuilder { } pub fn build(self) -> ArrowReader { + let schema_ref = self.schema(); let projected_data_type = self .file_metadata .root_data_type() @@ -71,9 +92,6 @@ impl ArrowReaderBuilder { projected_data_type, stripe_index: 0, }; - let schema_ref = self - .schema_ref - .unwrap_or_else(|| Arc::new(create_arrow_schema(&cursor))); ArrowReader { cursor, schema_ref, @@ -111,16 +129,6 @@ impl ArrowReader { } } -pub(crate) fn create_arrow_schema(cursor: &Cursor) -> Schema { - let metadata = cursor - .file_metadata - .user_custom_metadata() - .iter() - .map(|(key, value)| (key.clone(), String::from_utf8_lossy(value).to_string())) - .collect::>(); - cursor.projected_data_type.create_arrow_schema(&metadata) -} - impl RecordBatchReader for ArrowReader { fn schema(&self) -> SchemaRef { self.schema_ref.clone() diff --git a/src/async_arrow_reader.rs b/src/async_arrow_reader.rs index 520b38b7..1e675e62 100644 --- a/src/async_arrow_reader.rs +++ b/src/async_arrow_reader.rs @@ -11,7 +11,7 @@ use futures::{ready, Stream}; use futures_util::FutureExt; use crate::array_decoder::NaiveStripeDecoder; -use crate::arrow_reader::{create_arrow_schema, Cursor}; +use crate::arrow_reader::Cursor; use crate::error::Result; use crate::reader::metadata::read_metadata_async; use crate::reader::AsyncChunkReader; @@ -191,13 +191,13 @@ impl ArrowReaderBuilder { .file_metadata() .root_data_type() .project(&self.projection); + let schema_ref = self.schema(); let cursor = Cursor { reader: self.reader, file_metadata: self.file_metadata, projected_data_type, stripe_index: 0, }; - let schema_ref = Arc::new(create_arrow_schema(&cursor)); ArrowStreamReader::new(cursor, self.batch_size, schema_ref) } }