Skip to content

Commit

Permalink
Correctly handle empty bloom filters, avoid infinite loop (#9)
Browse files Browse the repository at this point in the history
`HashIndexIterator` used to loop infinitely when `bit_size` was 0,
because it wouldn't be able to generate a random index that's `< 0`
(fair enough).

Now it simply exists instantly if `bit_size == 0`.

Also added a test case for empty blooms. They "technically" contain
everything as a false positive. I opted for that, rather than "an empty
bloom contains nothing", since that keeps the invariant that if you
`.insert` something into a bloom filter, it will *always* be
`contain`ed.
  • Loading branch information
matheus23 authored Aug 21, 2023
1 parent a8cd85b commit e668ca4
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 1 deletion.
16 changes: 16 additions & 0 deletions deterministic-bloom/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ impl<T: AsRef<[u8]>> Iterator for HashIndexIterator<'_, T> {
type Item = usize;

fn next(&mut self) -> Option<Self::Item> {
if self.bit_size == 0 {
// This avoids an infinite loop in rejection sampling.
return None;
}

let bit_size_po2 = self.bit_size.next_power_of_two();
loop {
let hash = xxh3::xxh3_64_with_seed(self.item.as_ref(), self.index) as usize;
Expand Down Expand Up @@ -190,6 +195,17 @@ impl BloomParams {
}
}

#[cfg(test)]
mod tests {
use super::HashIndexIterator;

#[test]
fn test_zero_bit_size() {
let mut iterator = HashIndexIterator::new(&[1, 2, 3], 0);
assert_eq!(iterator.next(), None);
}
}

#[cfg(test)]
mod proptests {
use super::BloomParams;
Expand Down
9 changes: 8 additions & 1 deletion deterministic-bloom/src/runtime_size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,13 @@ mod tests {
assert!(!deserialized.contains(b"abc"));
assert_eq!(deserialized, filter);
}

#[test]
fn empty_bloom_filter() {
let filter = BloomFilter::new_with(3, Box::new([]));
// Technically an empty bloom "contains" anything, since everything is a false positive.
assert!(filter.contains(&[1, 2, 3]));
}
}

#[cfg(test)]
Expand Down Expand Up @@ -328,6 +335,6 @@ mod proptests {

let computed_fpr = false_positives as f64 / measurements as f64;
// The actual FPR should be pretty close
prop_assert!((computed_fpr - fpr).abs() < 1e-3);
prop_assert!((computed_fpr - fpr).abs() < 1.5e-3);
}
}

0 comments on commit e668ca4

Please sign in to comment.