histogram/
standard.rs

1use crate::{Bucket, Config, Error, SparseHistogram};
2
3/// A histogram that uses plain 64bit counters for each bucket.
4#[derive(Clone, Debug, PartialEq, Eq)]
5#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
6pub struct Histogram {
7    pub(crate) config: Config,
8    pub(crate) buckets: Box<[u64]>,
9}
10
11impl Histogram {
12    /// Construct a new histogram from the provided parameters. See the
13    /// documentation for [`crate::Config`] to understand their meaning.
14    pub fn new(grouping_power: u8, max_value_power: u8) -> Result<Self, Error> {
15        let config = Config::new(grouping_power, max_value_power)?;
16
17        Ok(Self::with_config(&config))
18    }
19
20    /// Creates a new histogram using a provided [`crate::Config`].
21    pub fn with_config(config: &Config) -> Self {
22        let buckets: Box<[u64]> = vec![0; config.total_buckets()].into();
23
24        Self {
25            config: *config,
26            buckets,
27        }
28    }
29
30    /// Creates a new histogram using a provided [`crate::Config`] and the
31    /// provided collection of buckets.
32    pub fn from_buckets(
33        grouping_power: u8,
34        max_value_power: u8,
35        buckets: Vec<u64>,
36    ) -> Result<Self, Error> {
37        let config = Config::new(grouping_power, max_value_power)?;
38
39        if config.total_buckets() != buckets.len() {
40            return Err(Error::IncompatibleParameters);
41        }
42
43        Ok(Self {
44            config,
45            buckets: buckets.into(),
46        })
47    }
48
49    /// Increment the counter for the bucket corresponding to the provided value
50    /// by one.
51    pub fn increment(&mut self, value: u64) -> Result<(), Error> {
52        self.add(value, 1)
53    }
54
55    /// Add some count to the counter for the bucket corresponding to the
56    /// provided value
57    pub fn add(&mut self, value: u64, count: u64) -> Result<(), Error> {
58        let index = self.config.value_to_index(value)?;
59        self.buckets[index] = self.buckets[index].wrapping_add(count);
60        Ok(())
61    }
62
63    /// Get a reference to the raw counters.
64    pub fn as_slice(&self) -> &[u64] {
65        &self.buckets
66    }
67
68    /// Get a mutable reference to the raw counters.
69    pub fn as_mut_slice(&mut self) -> &mut [u64] {
70        &mut self.buckets
71    }
72
73    /// Return a collection of percentiles from this histogram.
74    ///
75    /// Each percentile should be in the inclusive range `0.0..=100.0`. For
76    /// example, the 50th percentile (median) can be found using `50.0`.
77    ///
78    /// The results will be sorted by the percentile.
79    pub fn percentiles(&self, percentiles: &[f64]) -> Result<Option<Vec<(f64, Bucket)>>, Error> {
80        // get the total count
81        let total_count: u128 = self.buckets.iter().map(|v| *v as u128).sum();
82
83        // sort the requested percentiles so we can find them in a single pass
84        let mut percentiles = percentiles.to_vec();
85        percentiles.sort_by(|a, b| a.partial_cmp(b).unwrap());
86
87        // validate all the percentiles
88        for percentile in &percentiles {
89            if !(0.0..=100.0).contains(percentile) {
90                return Err(Error::InvalidPercentile);
91            }
92        }
93
94        // empty histogram, no percentiles available
95        if total_count == 0 {
96            return Ok(None);
97        }
98
99        let mut bucket_idx = 0;
100        let mut partial_sum = self.buckets[bucket_idx] as u128;
101
102        let result: Vec<(f64, Bucket)> = percentiles
103            .iter()
104            .filter_map(|percentile| {
105                // For 0.0th percentile (min) we need to report the first bucket
106                // with a non-zero count.
107                let count =
108                    std::cmp::max(1, (percentile / 100.0 * total_count as f64).ceil() as u128);
109
110                loop {
111                    // found the matching bucket index for this percentile
112                    if partial_sum >= count {
113                        return Some((
114                            *percentile,
115                            Bucket {
116                                count: self.buckets[bucket_idx],
117                                range: self.config.index_to_range(bucket_idx),
118                            },
119                        ));
120                    }
121
122                    // check if we have reached the end of the buckets
123                    if bucket_idx == (self.buckets.len() - 1) {
124                        break;
125                    }
126
127                    // otherwise, increment the bucket index, partial sum, and loop
128                    bucket_idx += 1;
129                    partial_sum += self.buckets[bucket_idx] as u128;
130                }
131
132                None
133            })
134            .collect();
135
136        Ok(Some(result))
137    }
138
139    /// Return a single percentile from this histogram.
140    ///
141    /// The percentile should be in the inclusive range `0.0..=100.0`. For
142    /// example, the 50th percentile (median) can be found using `50.0`.
143    pub fn percentile(&self, percentile: f64) -> Result<Option<Bucket>, Error> {
144        self.percentiles(&[percentile])
145            .map(|v| v.map(|x| x.first().unwrap().1.clone()))
146    }
147
148    /// Returns a new histogram with a reduced grouping power. The reduced
149    /// grouping power should lie in the range (0..existing grouping power).
150    ///
151    /// The difference in grouping powers determines how much histogram size
152    /// is reduced by, with every step approximately halving the total
153    /// number of buckets (and hence total size of the histogram), while
154    /// doubling the relative error.
155    ///
156    /// This works by iterating over every bucket in the existing histogram
157    /// and inserting the contained values into the new histogram. While we
158    /// do not know the exact values of the data points (only that they lie
159    /// within the bucket's range), it does not matter since the bucket is
160    /// not split during downsampling and any value can be used.
161    pub fn downsample(&self, grouping_power: u8) -> Result<Histogram, Error> {
162        if grouping_power >= self.config.grouping_power() {
163            return Err(Error::MaxPowerTooLow);
164        }
165
166        let mut histogram = Histogram::new(grouping_power, self.config.max_value_power())?;
167        for (i, n) in self.as_slice().iter().enumerate() {
168            // Skip empty buckets
169            if *n != 0 {
170                let val = self.config.index_to_lower_bound(i);
171                histogram.add(val, *n)?;
172            }
173        }
174
175        Ok(histogram)
176    }
177
178    /// Adds the other histogram to this histogram and returns the result as a
179    /// new histogram.
180    ///
181    /// An error is returned if the two histograms have incompatible parameters
182    /// or if there is an overflow.
183    pub fn checked_add(&self, other: &Histogram) -> Result<Histogram, Error> {
184        if self.config != other.config {
185            return Err(Error::IncompatibleParameters);
186        }
187
188        let mut result = self.clone();
189
190        for (this, other) in result.buckets.iter_mut().zip(other.buckets.iter()) {
191            *this = this.checked_add(*other).ok_or(Error::Overflow)?;
192        }
193
194        Ok(result)
195    }
196
197    /// Adds the other histogram to this histogram and returns the result as a
198    /// new histogram.
199    ///
200    /// An error is returned if the two histograms have incompatible parameters.
201    pub fn wrapping_add(&self, other: &Histogram) -> Result<Histogram, Error> {
202        if self.config != other.config {
203            return Err(Error::IncompatibleParameters);
204        }
205
206        let mut result = self.clone();
207
208        for (this, other) in result.buckets.iter_mut().zip(other.buckets.iter()) {
209            *this = this.wrapping_add(*other);
210        }
211
212        Ok(result)
213    }
214
215    /// Subtracts the other histogram from this histogram and returns the result
216    /// as a new histogram.
217    ///
218    /// An error is returned if the two histograms have incompatible parameters
219    /// or if there is an overflow.
220    pub fn checked_sub(&self, other: &Histogram) -> Result<Histogram, Error> {
221        if self.config != other.config {
222            return Err(Error::IncompatibleParameters);
223        }
224
225        let mut result = self.clone();
226
227        for (this, other) in result.buckets.iter_mut().zip(other.buckets.iter()) {
228            *this = this.checked_sub(*other).ok_or(Error::Overflow)?;
229        }
230
231        Ok(result)
232    }
233
234    /// Subtracts the other histogram from this histogram and returns the result
235    /// as a new histogram.
236    ///
237    /// An error is returned if the two histograms have incompatible parameters.
238    pub fn wrapping_sub(&self, other: &Histogram) -> Result<Histogram, Error> {
239        if self.config != other.config {
240            return Err(Error::IncompatibleParameters);
241        }
242
243        let mut result = self.clone();
244
245        for (this, other) in result.buckets.iter_mut().zip(other.buckets.iter()) {
246            *this = this.wrapping_sub(*other);
247        }
248
249        Ok(result)
250    }
251
252    /// Returns an interator across the histogram.
253    pub fn iter(&self) -> Iter {
254        Iter {
255            index: 0,
256            histogram: self,
257        }
258    }
259
260    /// Returns the bucket configuration of the histogram.
261    pub fn config(&self) -> Config {
262        self.config
263    }
264}
265
266impl<'a> IntoIterator for &'a Histogram {
267    type Item = Bucket;
268    type IntoIter = Iter<'a>;
269
270    fn into_iter(self) -> Self::IntoIter {
271        Iter {
272            index: 0,
273            histogram: self,
274        }
275    }
276}
277
278/// An iterator across the histogram buckets.
279pub struct Iter<'a> {
280    index: usize,
281    histogram: &'a Histogram,
282}
283
284impl Iterator for Iter<'_> {
285    type Item = Bucket;
286
287    fn next(&mut self) -> Option<<Self as std::iter::Iterator>::Item> {
288        if self.index >= self.histogram.buckets.len() {
289            return None;
290        }
291
292        let bucket = Bucket {
293            count: self.histogram.buckets[self.index],
294            range: self.histogram.config.index_to_range(self.index),
295        };
296
297        self.index += 1;
298
299        Some(bucket)
300    }
301}
302
303impl From<&SparseHistogram> for Histogram {
304    fn from(other: &SparseHistogram) -> Self {
305        let mut histogram = Histogram::with_config(&other.config);
306
307        for (index, count) in other.index.iter().zip(other.count.iter()) {
308            histogram.buckets[*index] = *count;
309        }
310
311        histogram
312    }
313}
314
315#[cfg(test)]
316mod tests {
317    use super::*;
318    use rand::Rng;
319
320    #[cfg(target_pointer_width = "64")]
321    #[test]
322    fn size() {
323        assert_eq!(std::mem::size_of::<Histogram>(), 48);
324    }
325
326    #[test]
327    // Tests percentiles
328    fn percentiles() {
329        let mut histogram = Histogram::new(7, 64).unwrap();
330
331        assert_eq!(histogram.percentile(50.0).unwrap(), None);
332        assert_eq!(
333            histogram.percentiles(&[50.0, 90.0, 99.0, 99.9]).unwrap(),
334            None
335        );
336
337        for i in 0..=100 {
338            let _ = histogram.increment(i);
339            assert_eq!(
340                histogram.percentile(0.0),
341                Ok(Some(Bucket {
342                    count: 1,
343                    range: 0..=0,
344                }))
345            );
346            assert_eq!(
347                histogram.percentile(100.0),
348                Ok(Some(Bucket {
349                    count: 1,
350                    range: i..=i,
351                }))
352            );
353        }
354        assert_eq!(histogram.percentile(0.0).map(|b| b.unwrap().end()), Ok(0));
355        assert_eq!(histogram.percentile(25.0).map(|b| b.unwrap().end()), Ok(25));
356        assert_eq!(histogram.percentile(50.0).map(|b| b.unwrap().end()), Ok(50));
357        assert_eq!(histogram.percentile(75.0).map(|b| b.unwrap().end()), Ok(75));
358        assert_eq!(histogram.percentile(90.0).map(|b| b.unwrap().end()), Ok(90));
359        assert_eq!(histogram.percentile(99.0).map(|b| b.unwrap().end()), Ok(99));
360        assert_eq!(
361            histogram.percentile(99.9).map(|b| b.unwrap().end()),
362            Ok(100)
363        );
364
365        assert_eq!(histogram.percentile(-1.0), Err(Error::InvalidPercentile));
366        assert_eq!(histogram.percentile(101.0), Err(Error::InvalidPercentile));
367
368        let percentiles: Vec<(f64, u64)> = histogram
369            .percentiles(&[50.0, 90.0, 99.0, 99.9])
370            .unwrap()
371            .unwrap()
372            .iter()
373            .map(|(p, b)| (*p, b.end()))
374            .collect();
375
376        assert_eq!(
377            percentiles,
378            vec![(50.0, 50), (90.0, 90), (99.0, 99), (99.9, 100)]
379        );
380
381        let _ = histogram.increment(1024);
382        assert_eq!(
383            histogram.percentile(99.9),
384            Ok(Some(Bucket {
385                count: 1,
386                range: 1024..=1031,
387            }))
388        );
389    }
390
391    #[test]
392    // Tests percentile used to find min
393    fn min() {
394        let mut histogram = Histogram::new(7, 64).unwrap();
395
396        assert_eq!(histogram.percentile(0.0).unwrap(), None);
397
398        let _ = histogram.increment(10);
399        assert_eq!(histogram.percentile(0.0).map(|b| b.unwrap().end()), Ok(10));
400
401        let _ = histogram.increment(4);
402        assert_eq!(histogram.percentile(0.0).map(|b| b.unwrap().end()), Ok(4));
403    }
404
405    #[test]
406    #[ignore = "this test is flaky (see issue #100)"]
407    // Tests downsampling
408    fn downsample() {
409        let mut histogram = Histogram::new(8, 32).unwrap();
410        let mut vals: Vec<u64> = Vec::with_capacity(10000);
411        let mut rng = rand::thread_rng();
412
413        // Generate 10,000 values to store in a sorted array and a histogram
414        for _ in 0..vals.capacity() {
415            let v: u64 = rng.gen_range(1..2_u64.pow(histogram.config.max_value_power() as u32));
416            vals.push(v);
417            let _ = histogram.increment(v);
418        }
419        vals.sort();
420
421        // List of percentiles to query and validate
422        let mut percentiles: Vec<f64> = Vec::with_capacity(109);
423        for i in 20..99 {
424            percentiles.push(i as f64);
425        }
426        let mut tail = vec![
427            99.1, 99.2, 99.3, 99.4, 99.5, 99.6, 99.7, 99.8, 99.9, 99.99, 100.0,
428        ];
429        percentiles.append(&mut tail);
430
431        // Downsample and check the percentiles lie within error margin
432        let h = histogram.clone();
433        let grouping_power = histogram.config.grouping_power();
434        for factor in 1..grouping_power {
435            let error = histogram.config.error();
436
437            for p in &percentiles {
438                let v = vals[((*p / 100.0 * (vals.len() as f64)) as usize) - 1];
439
440                // Value and relative error from full histogram
441                let vhist = histogram.percentile(*p).unwrap().unwrap().end();
442                let e = (v.abs_diff(vhist) as f64) * 100.0 / (v as f64);
443                assert!(e < error);
444            }
445
446            histogram = h.downsample(grouping_power - factor).unwrap();
447        }
448    }
449
450    // Return four histograms (three with identical configs and one with a
451    // different config) for testing add and subtract. One of the histograms
452    // should be populated with the maximum u64 value to cause overflows.
453    fn build_histograms() -> (Histogram, Histogram, Histogram, Histogram) {
454        let mut h1 = Histogram::new(1, 3).unwrap();
455        let mut h2 = Histogram::new(1, 3).unwrap();
456        let mut h3 = Histogram::new(1, 3).unwrap();
457        let h4 = Histogram::new(7, 32).unwrap();
458
459        for i in 0..h1.config().total_buckets() {
460            h1.as_mut_slice()[i] = 1;
461            h2.as_mut_slice()[i] = 1;
462            h3.as_mut_slice()[i] = u64::MAX;
463        }
464
465        (h1, h2, h3, h4)
466    }
467
468    #[test]
469    // Tests checked add
470    fn checked_add() {
471        let (h, h_good, h_overflow, h_mismatch) = build_histograms();
472
473        assert_eq!(
474            h.checked_add(&h_mismatch),
475            Err(Error::IncompatibleParameters)
476        );
477
478        let r = h.checked_add(&h_good).unwrap();
479        assert_eq!(r.as_slice(), &[2, 2, 2, 2, 2, 2]);
480
481        assert_eq!(h.checked_add(&h_overflow), Err(Error::Overflow));
482    }
483
484    #[test]
485    // Tests wrapping add
486    fn wrapping_add() {
487        let (h, h_good, h_overflow, h_mismatch) = build_histograms();
488
489        assert_eq!(
490            h.wrapping_add(&h_mismatch),
491            Err(Error::IncompatibleParameters)
492        );
493
494        let r = h.wrapping_add(&h_good).unwrap();
495        assert_eq!(r.as_slice(), &[2, 2, 2, 2, 2, 2]);
496
497        let r = h.wrapping_add(&h_overflow).unwrap();
498        assert_eq!(r.as_slice(), &[0, 0, 0, 0, 0, 0]);
499    }
500
501    #[test]
502    // Tests checked sub
503    fn checked_sub() {
504        let (h, h_good, h_overflow, h_mismatch) = build_histograms();
505
506        assert_eq!(
507            h.checked_sub(&h_mismatch),
508            Err(Error::IncompatibleParameters)
509        );
510
511        let r = h.checked_sub(&h_good).unwrap();
512        assert_eq!(r.as_slice(), &[0, 0, 0, 0, 0, 0]);
513
514        assert_eq!(h.checked_add(&h_overflow), Err(Error::Overflow));
515    }
516
517    #[test]
518    // Tests wrapping sub
519    fn wrapping_sub() {
520        let (h, h_good, h_overflow, h_mismatch) = build_histograms();
521
522        assert_eq!(
523            h.wrapping_sub(&h_mismatch),
524            Err(Error::IncompatibleParameters)
525        );
526
527        let r = h.wrapping_sub(&h_good).unwrap();
528        assert_eq!(r.as_slice(), &[0, 0, 0, 0, 0, 0]);
529
530        let r = h.wrapping_sub(&h_overflow).unwrap();
531        assert_eq!(r.as_slice(), &[2, 2, 2, 2, 2, 2]);
532    }
533
534    #[test]
535    // Test creating the histogram from buckets
536    fn from_buckets() {
537        let mut histogram = Histogram::new(8, 32).unwrap();
538        for i in 0..=100 {
539            let _ = histogram.increment(i);
540        }
541
542        let buckets = histogram.as_slice();
543        let constructed = Histogram::from_buckets(8, 32, buckets.to_vec()).unwrap();
544
545        assert!(constructed == histogram);
546    }
547}
histogram/standard.rs

histogram/
standard.rs