Permalink
Browse files

Fixed Histogram: halved memory footprint, test boundary conditions, d…

…ropped unnecessary binary search.

These changes preserve/correct behavior of the current Histogram.

src/java/voldemort/store/stats/Histogram.java
- Halved memory footprint by dropping unnecessary "bounds" array
- Dropped unnecessary binary search, making insert O(1) rather than O(log(nBuckets))
- Improved documentation
- Made interface consistent for type of values inserted/got from histogram (i.e., all are 'long')

test/unit/voldemort/store/stats/HistogramTest.java
- Added tests for boundary conditions: -ive values are dropped on
  insert, "too large" values are bucketed in the final bucket on
  insert.

Minor fixes to RequestCounter calls to histogram to conform to 'long' interfaces.
  • Loading branch information...
1 parent 1ac1b32 commit 90aabc38992bec133a8325028272f1275d9dd1b0 @jayjwylie jayjwylie committed Nov 12, 2012
@@ -7,10 +7,15 @@
import voldemort.annotations.concurrency.Threadsafe;
/**
- * A class for computing percentiles based on a histogram. Values are bucketed
- * by a configurable bound (e.g., 0-1, 1-2, 2-3). When a value is inserted,
- * perform a binary search to find the correct bucket.
+ * A class for computing percentiles based on a simple histogram.
*
+ * The histogram starts at 0 and then has uniformly sized buckets. The number of
+ * buckets and width of each bucket is specified upon construction. Each bucket
+ * in the histogram "counts" the number of values inserted into the histogram
+ * that fall into the bucket's range.
+ *
+ * All interfaces for adding data to the histogram or querying the histogram for
+ * quantiles are synchronized to make this object threadsafe.
*
*/
@Threadsafe
@@ -19,7 +24,8 @@
private final int nBuckets;
private final int step;
private final int[] buckets;
- private final int[] bounds;
+ private final long upperBound;
+
private int size;
private long sum;
private static final Logger logger = Logger.getLogger(Histogram.class);
@@ -43,21 +49,13 @@ public Histogram(int nBuckets, int step, long resetIntervalMs) {
* Initialize an empty histogram
*
* @param nBuckets The number of buckets to use
- * @param step The size of each bucket
+ * @param step The size (width) of each bucket
*/
public Histogram(int nBuckets, int step) {
this.nBuckets = nBuckets;
this.step = step;
+ this.upperBound = step * nBuckets;
this.buckets = new int[nBuckets];
- this.bounds = new int[nBuckets];
- init();
- }
-
- protected void init() {
- int bound = 0;
- for(int i = 0; i < nBuckets; i++, bound += step) {
- bounds[i] = bound;
- }
reset();
}
@@ -73,37 +71,48 @@ public synchronized void reset() {
/**
* Insert a value into the right bucket of the histogram. If the value is
- * larger than any bound, insert into the last bucket
+ * larger than any bound, insert into the last bucket. If the value is less
+ * than zero, then ignore it.
*
* @param data The value to insert into the histogram
*/
public synchronized void insert(long data) {
resetIfNeeded();
- int index = findBucket(data);
- if(index == -1) {
- logger.error(data + " can't be bucketed, is invalid!");
+ long index = 0;
+ if(data >= this.upperBound) {
+ index = nBuckets - 1;
+ } else if(data < 0) {
+ logger.error(data + " can't be bucketed because it is negative!");
+ return;
+ } else {
+ index = data / step;
+ }
+ if(index >= nBuckets) {
+ // This is dead code. Defend against code changes in future.
+ logger.error(data + " can't be bucketed because index overflowed number of buckets!");
return;
}
- buckets[index]++;
+ buckets[(int) index]++;
sum += data;
size++;
}
/**
* Find the a value <em>n</em> such that the percentile falls within [
- * <em>n</em>, <em>n + step</em>)
+ * <em>n</em>, <em>n + step</em>). This method does a <em>LINEAR</em> prove
+ * of the histogram. I.e., this method is O(nBuckets).
*
* @param quantile The percentile to find
* @return Lower bound associated with the percentile
*/
- public synchronized int getQuantile(double quantile) {
+ public synchronized long getQuantile(double quantile) {
resetIfNeeded();
int total = 0;
for(int i = 0; i < nBuckets; i++) {
total += buckets[i];
double currQuantile = ((double) total) / ((double) size);
if(currQuantile >= quantile) {
- return bounds[i];
+ return i * step;
}
}
return 0;
@@ -123,39 +132,6 @@ public synchronized double getAverage() {
return (sum * 1.0) / size;
}
- private int findBucket(long needle) {
- long max = step * nBuckets;
- if(needle > max) {
- return nBuckets - 1;
- }
- int low = 0;
- int high = nBuckets - 1;
- while(low <= high) {
- int mid = (low + high) / 2;
- int cmp = compareToBucket(mid, needle);
- if(cmp == 0) {
- return mid;
- } else if(cmp > 0) {
- high = mid - 1;
- } else if(cmp < 0) {
- low = mid + 1;
- }
- }
- return -1;
- }
-
- private int compareToBucket(int bucket, long needle) {
- int low = bounds[bucket];
- int high = low + step;
- if(low <= needle && high > needle) {
- return 0;
- } else if(low > needle) {
- return 1;
- } else {
- return -1;
- }
- }
-
private void resetIfNeeded() {
if(resetIntervalMs > -1) {
if((System.currentTimeMillis() - lastResetTimeMs) >= this.resetIntervalMs) {
@@ -17,8 +17,8 @@
private final int durationMS;
private final Time time;
private final Histogram histogram;
- private volatile int q95LatencyMs;
- private volatile int q99LatencyMs;
+ private volatile long q95LatencyMs;
+ private volatile long q99LatencyMs;
private boolean useHistogram;
/**
@@ -233,11 +233,11 @@ public long getGetAllMaxCount() {
return getValidAccumulator().getAllMaxCount;
}
- public int getQ95LatencyMs() {
+ public long getQ95LatencyMs() {
return q95LatencyMs;
}
- public int getQ99LatencyMs() {
+ public long getQ99LatencyMs() {
return q99LatencyMs;
}
@@ -68,4 +68,27 @@ public void testResetHistogram() {
assertEquals(0, resetingHistogram.getQuantile(0.99));
assertEquals(0.0, resetingHistogram.getAverage(), 0.0);
}
+
+ @Test
+ public void testUpperBoundaryCondition() {
+ Histogram h = new Histogram(100, 1);
+ h.insert(98);
+ h.insert(99);
+ h.insert(100); // Should bucket with 99
+ h.insert(101); // Should bucket with 99
+
+ assertEquals(h.getQuantile(0.24), 98);
+ assertEquals(h.getQuantile(0.26), 99);
+ }
+
+ @Test
+ public void testLowerBoundaryCondition() {
+ Histogram h = new Histogram(100, 1);
+ h.insert(-1); // Should not be bucketed
+ h.insert(0);
+ h.insert(1);
+
+ assertEquals(h.getQuantile(0.49), 0);
+ assertEquals(h.getQuantile(0.51), 1);
+ }
}

0 comments on commit 90aabc3

Please sign in to comment.