Skip to content

Commit

Permalink
refactor: Implement CSV, IPC and NDJson in the MultiScanExec node (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
coastalwhite authored Jan 10, 2025
1 parent 7cdc057 commit 239d7c8
Show file tree
Hide file tree
Showing 13 changed files with 490 additions and 339 deletions.
15 changes: 12 additions & 3 deletions crates/polars-arrow/src/io/ipc/read/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,24 @@ pub struct FileMetadata {

/// Read the row count by summing the length of the of the record batches
pub fn get_row_count<R: Read + Seek>(reader: &mut R) -> PolarsResult<i64> {
let mut message_scratch: Vec<u8> = Default::default();
let (_, footer_len) = read_footer_len(reader)?;
let footer = read_footer(reader, footer_len)?;
let (_, blocks) = deserialize_footer_blocks(&footer)?;

get_row_count_from_blocks(reader, &blocks)
}

/// Read the row count by summing the length of the of the record batches in blocks
pub fn get_row_count_from_blocks<R: Read + Seek>(
reader: &mut R,
blocks: &[arrow_format::ipc::Block],
) -> PolarsResult<i64> {
let mut message_scratch: Vec<u8> = Default::default();

blocks
.into_iter()
.iter()
.map(|block| {
let message = get_message_from_block(reader, &block, &mut message_scratch)?;
let message = get_message_from_block(reader, block, &mut message_scratch)?;
let record_batch = get_record_batch(message)?;
record_batch.length().map_err(|e| e.into())
})
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-arrow/src/io/ipc/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ pub(crate) use common::first_dict_field;
pub use common::{prepare_projection, ProjectionInfo};
pub use error::OutOfSpecKind;
pub use file::{
deserialize_footer, get_row_count, read_batch, read_file_dictionaries, read_file_metadata,
FileMetadata,
deserialize_footer, get_row_count, get_row_count_from_blocks, read_batch,
read_file_dictionaries, read_file_metadata, FileMetadata,
};
use polars_utils::aliases::PlHashMap;
pub use reader::FileReader;
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-mem-engine/src/executors/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ mod group_by_dynamic;
mod group_by_partitioned;
pub(super) mod group_by_rolling;
mod hconcat;
mod hive_scan;
mod join;
mod multi_file_scan;
mod projection;
mod projection_simple;
mod projection_utils;
Expand Down Expand Up @@ -39,8 +39,8 @@ pub(super) use self::group_by_partitioned::*;
#[cfg(feature = "dynamic_group_by")]
pub(super) use self::group_by_rolling::GroupByRollingExec;
pub(super) use self::hconcat::*;
pub(super) use self::hive_scan::*;
pub(super) use self::join::*;
pub(super) use self::multi_file_scan::*;
pub(super) use self::projection::*;
pub(super) use self::projection_simple::*;
pub(super) use self::scan::*;
Expand Down
Loading

0 comments on commit 239d7c8

Please sign in to comment.