Skip to content

Commit

Permalink
Standard deviation aggregator.
Browse files Browse the repository at this point in the history
Change-Id: I35aecd7433495fd7bfd99ebb2803416bae5fe0b1
Signed-off-by: Benoit Sigoure <tsuna@stumbleupon.com>
  • Loading branch information
deusaquilus authored and tsuna committed Feb 22, 2012
1 parent c110c8c commit 049e34b
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 1 deletion.
1 change: 1 addition & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ tsdb_DEPS = \
pkgdata_DATA = $(tsdb_DEPS) $(jar)

test_SRC := \
src/core/TestAggregators.java \
src/core/TestCompactionQueue.java \
src/core/TestTags.java \
src/stats/TestHistogram.java \
Expand Down
71 changes: 70 additions & 1 deletion src/core/Aggregators.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,19 @@ public final class Aggregators {
/** Aggregator that returns the average value of the data point. */
public static final Aggregator AVG = new Avg();

/** Aggregator that returns the Standard Deviation of the data points. */
public static final Aggregator DEV = new StdDev();

/** Maps an aggregator name to its instance. */
private static final HashMap<String, Aggregator> aggregators;

static {
aggregators = new HashMap<String, Aggregator>(4);
aggregators = new HashMap<String, Aggregator>(5);
aggregators.put("sum", SUM);
aggregators.put("min", MIN);
aggregators.put("max", MAX);
aggregators.put("avg", AVG);
aggregators.put("dev", DEV);
}

private Aggregators() {
Expand Down Expand Up @@ -178,7 +182,72 @@ public double runDouble(final Doubles values) {
public String toString() {
return "avg";
}
}

/**
* Standard Deviation aggregator.
* Can compute without storing all of the data points in memory at the same
* time. This implementation is based upon a
* <a href="http://www.johndcook.com/standard_deviation.html">paper by John
* D. Cook</a>, which itself is based upon a method that goes back to a 1962
* paper by B. P. Welford and is presented in Donald Knuth's Art of
* Computer Programming, Vol 2, page 232, 3rd edition
*/
private static final class StdDev implements Aggregator {

public long runLong(final Longs values) {
long n = 1;
double old_mean = 0;
double new_mean = 0;
double variance = 0;

while (values.hasNextValue()) {
final double x = values.nextLongValue();
if (n == 1) {
old_mean = x;
} else {
new_mean = old_mean + (x - old_mean) / n;
variance += (x - old_mean) * (x - new_mean);
old_mean = new_mean;
}
n++;
}

if (n > 1) {
return (long) Math.sqrt(variance / (n - 1));
} else {
return 0;
}
}

public double runDouble(final Doubles values) {
long n = 1;
double old_mean = 0;
double new_mean = 0;
double variance = 0;

while (values.hasNextValue()) {
final double x = values.nextDoubleValue();
if (n == 1) {
old_mean = x;
} else {
new_mean = old_mean + (x - old_mean) / n;
variance += (x - old_mean) * (x - new_mean);
old_mean = new_mean;
}
n++;
}

if (n > 1) {
return Math.sqrt(variance / (n - 1));
} else {
return 0;
}
}

public String toString() {
return "dev";
}
}

}
98 changes: 98 additions & 0 deletions src/core/TestAggregators.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// This file is part of OpenTSDB.
// Copyright (C) 2012 The OpenTSDB Authors.
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 2.1 of the License, or (at your
// option) any later version. This program is distributed in the hope that it
// will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details. You should have received a copy
// of the GNU Lesser General Public License along with this program. If not,
// see <http://www.gnu.org/licenses/>.
package net.opentsdb.core;

import java.util.Random;

import org.junit.Assert;
import org.junit.Test;

public final class TestAggregators {

private static final Random random;
static {
final long seed = System.nanoTime();
System.out.println("Random seed: " + seed);
random = new Random(seed);
}

/**
* Epsilon used to compare floating point values.
* Instead of using a fixed epsilon to compare our numbers, we calculate
* it based on the percentage of our actual expected values. We do things
* this way because our numbers can be extremely large and if you change
* the scale of the numbers a static precision may no longer work
*/
private static final double EPSILON_PERCENTAGE = 0.001;

private static final class Numbers implements Aggregator.Longs, Aggregator.Doubles {
private final long[] numbers;
private int i = 0;

public Numbers(final long[] numbers) {
this.numbers = numbers;
}

public boolean hasNextValue() {
return i + 1 < numbers.length;
}

public long nextLongValue() {
return numbers[i++];
}

public double nextDoubleValue() {
return numbers[i++];
}

void reset() {
i = 0;
}
}

@Test
public void testStdDevRandomValues() {
final long[] values = new long[1000];
for (int i = 0; i < values.length; i++) {
values[i] = random.nextLong();
}
final double expected = naiveStdDev(values);

// Calculate the epsilon based on the percentage of the number.
final double epsilon = EPSILON_PERCENTAGE * expected;
final Numbers numbers = new Numbers(values);
final Aggregator agg = Aggregators.get("dev");

Assert.assertEquals(expected, agg.runDouble(numbers), epsilon);
numbers.reset();
Assert.assertEquals(expected, agg.runLong(numbers), Math.max(epsilon, 1.0));
}

private static double naiveStdDev(long[] values) {
double sum = 0;
double mean = 0;

for (final double value : values) {
sum += value;
}
mean = sum / values.length;

double squaresum = 0;
for (final double value : values) {
squaresum += Math.pow(value - mean, 2);
}
final double variance = squaresum / values.length;
return Math.sqrt(variance);
}

}

0 comments on commit 049e34b

Please sign in to comment.