Skip to content

Commit

Permalink
histogram: fix percentiles for empty histograms (#116)
Browse files Browse the repository at this point in the history
When the histogram is empty, the percentiles function
currently returns the first bucket for the standard
histogram and panics due to an underflow in the sparse
histogram. Change the signature to return an Optional
value, which is set to None in the empty histogram case
for both standard and sparse histograms.

Also, add a batched percentile lookup for the sparse
histogram to bring the interface in line with the
standard histogram.
  • Loading branch information
mihirn authored Jun 12, 2024
1 parent 8e41e0d commit 696f958
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 54 deletions.
2 changes: 1 addition & 1 deletion histogram/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "histogram"
version = "0.10.2"
version = "0.11.0"
edition = "2021"
authors = ["Brian Martin <brian@pelikan.io>"]
license = "MIT OR Apache-2.0"
Expand Down
52 changes: 36 additions & 16 deletions histogram/src/atomic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,50 +97,69 @@ mod test {
let percentiles = histogram.drain();
assert_eq!(
percentiles.percentile(50.0),
Ok(Bucket {
Ok(Some(Bucket {
count: 1,
range: 50..=50,
})
}))
);
histogram.increment(1000).unwrap();
// after another load the map is empty
let percentiles = histogram.drain();
assert_eq!(
percentiles.percentile(50.0),
Ok(Bucket {
Ok(Some(Bucket {
count: 1,
range: 1000..=1003,
})
}))
);
}

#[test]
// Tests percentiles
fn percentiles() {
let histogram = AtomicHistogram::new(7, 64).unwrap();
let percentiles = [25.0, 50.0, 75.0, 90.0, 99.0];

// check empty
assert_eq!(histogram.load().percentiles(&percentiles), Ok(None));

for percentile in percentiles {
assert_eq!(histogram.load().percentile(percentile), Ok(None));
}

// populate and check percentiles
for i in 0..=100 {
let _ = histogram.increment(i);
assert_eq!(
histogram.load().percentile(0.0),
Ok(Bucket {
Ok(Some(Bucket {
count: 1,
range: 0..=0,
})
}))
);
assert_eq!(
histogram.load().percentile(100.0),
Ok(Bucket {
Ok(Some(Bucket {
count: 1,
range: i..=i,
})
}))
);
}

for percentile in percentiles {
assert_eq!(
histogram
.load()
.percentile(percentile)
.map(|b| b.unwrap().end()),
Ok(percentile as u64)
);
}
assert_eq!(histogram.load().percentile(25.0).map(|b| b.end()), Ok(25));
assert_eq!(histogram.load().percentile(50.0).map(|b| b.end()), Ok(50));
assert_eq!(histogram.load().percentile(75.0).map(|b| b.end()), Ok(75));
assert_eq!(histogram.load().percentile(90.0).map(|b| b.end()), Ok(90));
assert_eq!(histogram.load().percentile(99.0).map(|b| b.end()), Ok(99));
assert_eq!(histogram.load().percentile(99.9).map(|b| b.end()), Ok(100));

assert_eq!(
histogram.load().percentile(99.9).map(|b| b.unwrap().end()),
Ok(100)
);

assert_eq!(
histogram.load().percentile(-1.0),
Expand All @@ -155,6 +174,7 @@ mod test {
.load()
.percentiles(&[50.0, 90.0, 99.0, 99.9])
.unwrap()
.unwrap()
.iter()
.map(|(p, b)| (*p, b.end()))
.collect();
Expand All @@ -167,10 +187,10 @@ mod test {
let _ = histogram.increment(1024);
assert_eq!(
histogram.load().percentile(99.9),
Ok(Bucket {
Ok(Some(Bucket {
count: 1,
range: 1024..=1031,
})
}))
);
}
}
82 changes: 62 additions & 20 deletions histogram/src/sparse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,35 +142,67 @@ impl SparseHistogram {
Ok(histogram)
}

/// Return a single percentile from this histogram.
/// Return a collection of percentiles from this histogram.
///
/// The percentile should be in the inclusive range `0.0..=100.0`. For
/// Each percentile should be in the inclusive range `0.0..=100.0`. For
/// example, the 50th percentile (median) can be found using `50.0`.
pub fn percentile(&self, percentile: f64) -> Result<Bucket, Error> {
///
/// The results will be sorted by the percentile.
pub fn percentiles(&self, percentiles: &[f64]) -> Result<Option<Vec<(f64, Bucket)>>, Error> {
// validate all the percentiles
if percentiles.is_empty() {
return Err(Error::InvalidPercentile);
}

for percentile in percentiles {
if !(0.0..=100.0).contains(percentile) {
return Err(Error::InvalidPercentile);
}
}

let total: u128 = self.count.iter().map(|v| *v as u128).sum();

if !(0.0..=100.0).contains(&percentile) {
return Err(Error::InvalidPercentile);
// empty histogram, no percentiles available
if total == 0 {
return Ok(None);
}

let search = ((total as f64) * percentile / 100.0).ceil() as usize;
// sort the requested percentiles so we can find them in a single pass
let mut percentiles = percentiles.to_vec();
percentiles.sort_by(|a, b| a.partial_cmp(b).unwrap());

let searches: Vec<usize> = percentiles
.iter()
.map(|p| ((total as f64) * *p / 100.0).ceil() as usize)
.collect();
let mut search_idx = 0;
let mut result: Vec<(f64, Bucket)> = Vec::with_capacity(percentiles.len());

let mut seen: usize = 0;
for (idx, count) in self.index.iter().zip(self.count.iter()) {
seen += *count as usize;
if seen >= search {
return Ok(Bucket {
count: *count,
range: self.config.index_to_range(*idx),
});
while search_idx < searches.len() && seen >= searches[search_idx] {
result.push((
percentiles[search_idx],
Bucket {
count: *count,
range: self.config.index_to_range(*idx),
},
));
search_idx += 1;
}
}

// should never be reached; return highest bucket if not found
let last_idx = self.index.len() - 1;
Ok(Bucket {
count: self.count[last_idx],
range: self.config.index_to_range(self.index[last_idx]),
})
Ok(Some(result))
}

/// Return a single percentile from this histogram.
///
/// The percentile should be in the inclusive range `0.0..=100.0`. For
/// example, the 50th percentile (median) can be found using `50.0`.
pub fn percentile(&self, percentile: f64) -> Result<Option<Bucket>, Error> {
self.percentiles(&[percentile])
.map(|v| v.map(|x| x.first().unwrap().1.clone()))
}

/// Returns a new histogram with a reduced grouping power. The reduced
Expand Down Expand Up @@ -384,20 +416,30 @@ mod tests {
}

#[test]
fn percentile() {
fn percentiles() {
let mut hstandard = Histogram::new(4, 10).unwrap();
let hempty = SparseHistogram::from(&hstandard);

for v in 1..1024 {
let _ = hstandard.increment(v);
}

let hsparse = SparseHistogram::from(&hstandard);

for percentile in [1.0, 10.0, 25.0, 50.0, 75.0, 90.0, 99.0, 99.9] {
let percentiles = [1.0, 10.0, 25.0, 50.0, 75.0, 90.0, 99.0, 99.9];
for percentile in percentiles {
let bempty = hempty.percentile(percentile).unwrap();
let bstandard = hstandard.percentile(percentile).unwrap();
let bsparse = hsparse.percentile(percentile).unwrap();

assert_eq!(bempty, None);
assert_eq!(bsparse, bstandard);
}

assert_eq!(hempty.percentiles(&percentiles), Ok(None));
assert_eq!(
hstandard.percentiles(&percentiles).unwrap(),
hsparse.percentiles(&percentiles).unwrap()
);
}

fn compare_histograms(hstandard: &Histogram, hsparse: &SparseHistogram) {
Expand Down
50 changes: 33 additions & 17 deletions histogram/src/standard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ impl Histogram {
/// example, the 50th percentile (median) can be found using `50.0`.
///
/// The results will be sorted by the percentile.
pub fn percentiles(&self, percentiles: &[f64]) -> Result<Vec<(f64, Bucket)>, Error> {
pub fn percentiles(&self, percentiles: &[f64]) -> Result<Option<Vec<(f64, Bucket)>>, Error> {
// get the total count
let total_count: u128 = self.buckets.iter().map(|v| *v as u128).sum();

Expand All @@ -91,6 +91,11 @@ impl Histogram {
}
}

// empty histogram, no percentiles available
if total_count == 0 {
return Ok(None);
}

let mut bucket_idx = 0;
let mut partial_sum = self.buckets[bucket_idx] as u128;

Expand Down Expand Up @@ -125,16 +130,16 @@ impl Histogram {
})
.collect();

Ok(result)
Ok(Some(result))
}

/// Return a single percentile from this histogram.
///
/// The percentile should be in the inclusive range `0.0..=100.0`. For
/// example, the 50th percentile (median) can be found using `50.0`.
pub fn percentile(&self, percentile: f64) -> Result<Bucket, Error> {
pub fn percentile(&self, percentile: f64) -> Result<Option<Bucket>, Error> {
self.percentiles(&[percentile])
.map(|v| v.first().unwrap().1.clone())
.map(|v| v.map(|x| x.first().unwrap().1.clone()))
}

/// Returns a new histogram with a reduced grouping power. The reduced
Expand Down Expand Up @@ -310,36 +315,47 @@ mod tests {
// Tests percentiles
fn percentiles() {
let mut histogram = Histogram::new(7, 64).unwrap();

assert_eq!(histogram.percentile(50.0).unwrap(), None);
assert_eq!(
histogram.percentiles(&[50.0, 90.0, 99.0, 99.9]).unwrap(),
None
);

for i in 0..=100 {
let _ = histogram.increment(i);
assert_eq!(
histogram.percentile(0.0),
Ok(Bucket {
Ok(Some(Bucket {
count: 1,
range: 0..=0,
})
}))
);
assert_eq!(
histogram.percentile(100.0),
Ok(Bucket {
Ok(Some(Bucket {
count: 1,
range: i..=i,
})
}))
);
}
assert_eq!(histogram.percentile(25.0).map(|b| b.end()), Ok(25));
assert_eq!(histogram.percentile(50.0).map(|b| b.end()), Ok(50));
assert_eq!(histogram.percentile(75.0).map(|b| b.end()), Ok(75));
assert_eq!(histogram.percentile(90.0).map(|b| b.end()), Ok(90));
assert_eq!(histogram.percentile(99.0).map(|b| b.end()), Ok(99));
assert_eq!(histogram.percentile(99.9).map(|b| b.end()), Ok(100));
assert_eq!(histogram.percentile(25.0).map(|b| b.unwrap().end()), Ok(25));
assert_eq!(histogram.percentile(50.0).map(|b| b.unwrap().end()), Ok(50));
assert_eq!(histogram.percentile(75.0).map(|b| b.unwrap().end()), Ok(75));
assert_eq!(histogram.percentile(90.0).map(|b| b.unwrap().end()), Ok(90));
assert_eq!(histogram.percentile(99.0).map(|b| b.unwrap().end()), Ok(99));
assert_eq!(
histogram.percentile(99.9).map(|b| b.unwrap().end()),
Ok(100)
);

assert_eq!(histogram.percentile(-1.0), Err(Error::InvalidPercentile));
assert_eq!(histogram.percentile(101.0), Err(Error::InvalidPercentile));

let percentiles: Vec<(f64, u64)> = histogram
.percentiles(&[50.0, 90.0, 99.0, 99.9])
.unwrap()
.unwrap()
.iter()
.map(|(p, b)| (*p, b.end()))
.collect();
Expand All @@ -352,10 +368,10 @@ mod tests {
let _ = histogram.increment(1024);
assert_eq!(
histogram.percentile(99.9),
Ok(Bucket {
Ok(Some(Bucket {
count: 1,
range: 1024..=1031,
})
}))
);
}

Expand Down Expand Up @@ -395,7 +411,7 @@ mod tests {
let v = vals[((*p / 100.0 * (vals.len() as f64)) as usize) - 1];

// Value and relative error from full histogram
let vhist = histogram.percentile(*p).unwrap().end();
let vhist = histogram.percentile(*p).unwrap().unwrap().end();
let e = (v.abs_diff(vhist) as f64) * 100.0 / (v as f64);
assert!(e < error);
}
Expand Down

0 comments on commit 696f958

Please sign in to comment.