From 2175e2a07bd997d8e58ea831756036daa41f49f0 Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Mon, 2 Dec 2024 22:52:55 +0100 Subject: [PATCH] fix(chunker): Don't underflow with wrong size_hint --- crates/core/src/chunker.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/crates/core/src/chunker.rs b/crates/core/src/chunker.rs index efec581b..63ca5c28 100644 --- a/crates/core/src/chunker.rs +++ b/crates/core/src/chunker.rs @@ -16,8 +16,8 @@ pub(super) mod constants { pub(super) const MIN_SIZE: usize = 512 * KB; /// The maximum size of a chunk. pub(super) const MAX_SIZE: usize = 8 * MB; - /// Buffer size used for reading. - pub(super) const BUF_SIZE: usize = 64 * KB; + /// Buffer size used for reading - TODO: Find out optimal size for best performance! + pub(super) const BUF_SIZE: usize = 4 * KB; /// Random polynomial maximum tries. pub(super) const RAND_POLY_MAX_TRIES: i32 = 1_000_000; } @@ -68,8 +68,8 @@ impl ChunkIter { /// * `rabin` - The rolling hash. pub(crate) fn new(reader: R, size_hint: usize, rabin: Rabin64) -> Self { Self { - buf: Vec::with_capacity(4 * constants::KB), - pos: 0, + buf: vec![0; constants::BUF_SIZE], + pos: constants::BUF_SIZE, reader, predicate: default_predicate, rabin, @@ -137,8 +137,6 @@ impl Iterator for ChunkIter { } if self.buf.len() == self.pos { - // TODO: use a possibly uninitialized buffer here - self.buf.resize(constants::BUF_SIZE, 0); match self.reader.read(&mut self.buf[..]) { Ok(0) => { self.finished = true; @@ -165,7 +163,7 @@ impl Iterator for ChunkIter { self.pos += 1; self.rabin.slide(byte); } - self.size_hint -= vec.len(); + self.size_hint = self.size_hint.saturating_sub(vec.len()); // size_hint can be too small! Some(Ok(vec)) } }