Skip to content

Commit

Permalink
c
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion committed Aug 4, 2024
1 parent 69b7ffd commit a1810b0
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 10 deletions.
28 changes: 18 additions & 10 deletions crates/polars-mem-engine/src/executors/scan/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ impl ParquetExec {
// Walk the files in reverse until we find the first file, and then translate the
// slice into a positive-offset equivalent.
let slice_start_as_n_from_end = -slice.0 as usize;
let slice_end_as_n_from_end = slice_start_as_n_from_end.saturating_sub(slice.1);
let mut cum_rows = 0;
let chunk_size = 8;
POOL.install(|| {
Expand Down Expand Up @@ -105,12 +104,17 @@ impl ParquetExec {
PolarsResult::Ok(())
})?;

let start = cum_rows.saturating_sub(slice_start_as_n_from_end);
let end = if slice_end_as_n_from_end >= cum_rows {
0
let (start, len) = if slice_start_as_n_from_end > cum_rows {
// We need to trim the slice, e.g. SLICE[offset: -100, len: 75] on a file of 50
// rows should only give the first 25 rows.
let first_file_position = slice_start_as_n_from_end - cum_rows;
(0, slice.1.saturating_sub(first_file_position))
} else {
start + slice.1
(cum_rows - slice_start_as_n_from_end, slice.1)
};

let end = start.saturating_add(len);

(start, end)
}
} else {
Expand Down Expand Up @@ -263,7 +267,6 @@ impl ParquetExec {
// Walk the files in reverse until we find the first file, and then translate the
// slice into a positive-offset equivalent.
let slice_start_as_n_from_end = -slice.0 as usize;
let slice_end_as_n_from_end = slice_start_as_n_from_end.saturating_sub(slice.1);
let mut cum_rows = 0;

let paths = &self.paths;
Expand Down Expand Up @@ -303,12 +306,17 @@ impl ParquetExec {
}
}

let start = cum_rows.saturating_sub(slice_start_as_n_from_end);
let end = if slice_end_as_n_from_end >= cum_rows {
0
let (start, len) = if slice_start_as_n_from_end > cum_rows {
// We need to trim the slice, e.g. SLICE[offset: -100, len: 75] on a file of 50
// rows should only give the first 25 rows.
let first_file_position = slice_start_as_n_from_end - cum_rows;
(0, slice.1.saturating_sub(first_file_position))
} else {
start + slice.1
(cum_rows - slice_start_as_n_from_end, slice.1)
};

let end = start.saturating_add(len);

(start, end)
}
} else {
Expand Down
8 changes: 8 additions & 0 deletions py-polars/tests/unit/io/test_lazy_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,3 +517,11 @@ def trim_to_metadata(path: str | Path) -> None:
assert_frame_equal(
pl.scan_parquet(paths[1:]).slice(-99, 1).collect(), df.clear()
)

path = tmp_path / "data"
df = pl.select(x=pl.int_range(0, 50))
df.write_parquet(path)
assert_frame_equal(pl.scan_parquet(path).slice(-100, 75).collect(), df.head(25))
assert_frame_equal(
pl.scan_parquet(path).slice(-1, (1 << 32) - 1).collect(), df.tail(1)
)

0 comments on commit a1810b0

Please sign in to comment.