Skip to content

Commit

Permalink
Use sample method to compute variance to improve performance and benc…
Browse files Browse the repository at this point in the history
…hmarking
  • Loading branch information
zix99 committed Nov 29, 2019
1 parent 0f5c88f commit 3f7659a
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 30 deletions.
21 changes: 11 additions & 10 deletions cmd/analyze.go
Expand Up @@ -19,22 +19,22 @@ func humanf(arg interface{}) string {
func writeAggrOutput(writer *multiterm.TermWriter, aggr *aggregation.MatchNumerical, extra bool, quantiles []float64) int {
writer.WriteForLine(0, "Samples: %v", color.Wrap(color.BrightWhite, humanize.Hi(aggr.Count())))
writer.WriteForLine(1, "Mean: %v", humanf(aggr.Mean()))
writer.WriteForLine(2, "Min: %v", humanf(aggr.Min()))
writer.WriteForLine(3, "Max: %v", humanf(aggr.Max()))
writer.WriteForLine(2, "StdDev: %v", humanf(aggr.StdDev()))
writer.WriteForLine(3, "Min: %v", humanf(aggr.Min()))
writer.WriteForLine(4, "Max: %v", humanf(aggr.Max()))

if extra {
writer.WriteForLine(4, "")
writer.WriteForLine(5, "")

data := aggr.Analyze()
writer.WriteForLine(5, "Median: %v", humanf(data.Median()))
writer.WriteForLine(6, "Mode: %v", humanf(data.Mode()))
writer.WriteForLine(7, "StdDev: %v", humanf(aggr.StdDev()))
writer.WriteForLine(6, "Median: %v", humanf(data.Median()))
writer.WriteForLine(7, "Mode: %v", humanf(data.Mode()))
for idx, q := range quantiles {
writer.WriteForLine(8+idx, "P%02.4f: %v", q, humanf(data.Quantile(q/100.0)))
}
return 8 + len(quantiles)
} else {
return 4
return 5
}
}

Expand All @@ -51,11 +51,12 @@ func parseStringSet(vals []string) []float64 {
}

func analyzeFunction(c *cli.Context) error {
config := aggregation.NumericalConfig{
Reverse: c.Bool("reverse"),
}
extra := c.Bool("extra")
quantiles := parseStringSet(c.StringSlice("quantile"))
config := aggregation.NumericalConfig{
Reverse: c.Bool("reverse"),
KeepValuesForAnalysis: extra,
}

aggr := aggregation.NewNumericalAggregator(&config)
writer := multiterm.New()
Expand Down
43 changes: 26 additions & 17 deletions pkg/aggregation/numerical.go
Expand Up @@ -11,17 +11,22 @@ type StatisticalAnalysis struct {
}

type NumericalConfig struct {
Reverse bool
Reverse bool // When sorting values for analysis, sort in reverse
KeepValuesForAnalysis bool // Keep values for more numerical analysis (mode, quantiles, etc)
}

type MatchNumerical struct {
samples uint64
sum float64
values []float64
mean float64
variance float64
min float64
max float64
parseErrors uint64
config *NumericalConfig

// values is all the samples (Only when KeepValues is on)
values []float64

config *NumericalConfig
}

func NewNumericalAggregator(config *NumericalConfig) *MatchNumerical {
Expand All @@ -35,8 +40,14 @@ func NewNumericalAggregator(config *NumericalConfig) *MatchNumerical {

func (s *MatchNumerical) Samplef(val float64) {
s.samples++
s.sum += val
s.values = append(s.values, val)

oldMean := s.mean
s.mean += (val - oldMean) / float64(s.samples)
s.variance += (val - oldMean) * (val - s.mean)

if s.config.KeepValuesForAnalysis {
s.values = append(s.values, val)
}

if val < s.min {
s.min = val
Expand Down Expand Up @@ -71,21 +82,19 @@ func (s *MatchNumerical) Max() float64 {
return s.max
}

func (s *MatchNumerical) Variance() float64 {
if s.samples > 1 {
return s.variance / float64(s.samples-1)
}
return 0.0
}

func (s *MatchNumerical) Mean() float64 {
return s.sum / float64(s.samples)
return s.mean
}

func (s *MatchNumerical) StdDev() float64 {
if s.samples == 0 {
return 0.0
}
mean := s.Mean()
diffSum := 0.0
for _, v := range s.values {
diffSum += (v - mean) * (v - mean)
}
diffMean := diffSum / float64(s.samples)
return math.Sqrt(diffMean)
return math.Sqrt(s.Variance())
}

func (s *MatchNumerical) Analyze() *StatisticalAnalysis {
Expand Down
10 changes: 7 additions & 3 deletions pkg/aggregation/numerical_test.go
Expand Up @@ -7,7 +7,9 @@ import (
)

func TestSimpleNumericalAggregation(t *testing.T) {
aggr := NewNumericalAggregator(&NumericalConfig{})
aggr := NewNumericalAggregator(&NumericalConfig{
KeepValuesForAnalysis: true,
})
aggr.Samplef(5)
aggr.Samplef(10)
aggr.Samplef(15)
Expand All @@ -16,7 +18,7 @@ func TestSimpleNumericalAggregation(t *testing.T) {
assert.Equal(t, 10.0, aggr.Mean())
assert.Equal(t, 5.0, aggr.Min())
assert.Equal(t, 15.0, aggr.Max())
assert.InEpsilon(t, 4.08248, aggr.StdDev(), 0.001)
assert.InEpsilon(t, 5.0, aggr.StdDev(), 0.001)

data := aggr.Analyze()

Expand All @@ -26,7 +28,9 @@ func TestSimpleNumericalAggregation(t *testing.T) {
}

func TestSimpleMode(t *testing.T) {
aggr := NewNumericalAggregator(&NumericalConfig{})
aggr := NewNumericalAggregator(&NumericalConfig{
KeepValuesForAnalysis: true,
})
aggr.Samplef(5)
aggr.Samplef(10)
aggr.Samplef(15)
Expand Down

0 comments on commit 3f7659a

Please sign in to comment.