-
Notifications
You must be signed in to change notification settings - Fork 2.1k
/
aggregated.go
205 lines (181 loc) · 8.35 KB
/
aggregated.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
/*
Copyright 2017 Simon J Mudd
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package discovery
import (
"time"
"github.com/montanaflynn/stats"
"vitess.io/vitess/go/vt/vtorc/collection"
)
// AggregatedDiscoveryMetrics contains aggregated metrics for instance discovery.
// Called from api/discovery-metrics-aggregated?seconds=xxx
type AggregatedDiscoveryMetrics struct {
FirstSeen time.Time // timestamp of the first data seen
LastSeen time.Time // timestamp of the last data seen
CountDistinctInstanceKeys int // number of distinct Instances seen (note: this may not be true: distinct = succeeded + failed)
CountDistinctOkInstanceKeys int // number of distinct Instances which succeeded
CountDistinctFailedInstanceKeys int // number of distinct Instances which failed
FailedDiscoveries uint64 // number of failed discoveries
SuccessfulDiscoveries uint64 // number of successful discoveries
InstancePollSecondsExceeded uint64 // number of times discoverInstance exceeded InstancePollSeconds
MeanTotalSeconds float64
MeanBackendSeconds float64
MeanInstanceSeconds float64
FailedMeanTotalSeconds float64
FailedMeanBackendSeconds float64
FailedMeanInstanceSeconds float64
MaxTotalSeconds float64
MaxBackendSeconds float64
MaxInstanceSeconds float64
FailedMaxTotalSeconds float64
FailedMaxBackendSeconds float64
FailedMaxInstanceSeconds float64
MedianTotalSeconds float64
MedianBackendSeconds float64
MedianInstanceSeconds float64
FailedMedianTotalSeconds float64
FailedMedianBackendSeconds float64
FailedMedianInstanceSeconds float64
P95TotalSeconds float64
P95BackendSeconds float64
P95InstanceSeconds float64
FailedP95TotalSeconds float64
FailedP95BackendSeconds float64
FailedP95InstanceSeconds float64
}
// aggregate returns the aggregate values of the given metrics (assumed to be Metric)
func aggregate(results []collection.Metric) AggregatedDiscoveryMetrics {
if len(results) == 0 {
return AggregatedDiscoveryMetrics{}
}
var (
first time.Time
last time.Time
)
type counterKey string
type hostKey string
type timerKey string
const (
FailedDiscoveries counterKey = "FailedDiscoveries"
Discoveries = "Discoveries"
InstancePollSecondsExceeded = "instancePollSecondsExceeded"
InstanceKeys hostKey = "InstanceKeys"
OkInstanceKeys = "OkInstanceKeys"
FailedInstanceKeys = "FailedInstanceKeys"
TotalSeconds timerKey = "TotalSeconds"
BackendSeconds = "BackendSeconds"
InstanceSeconds = "InstanceSeconds"
FailedTotalSeconds = "FailedTotalSeconds"
FailedBackendSeconds = "FailedBackendSeconds"
FailedInstanceSeconds = "FailedInstanceSeconds"
)
counters := make(map[counterKey]uint64) // map of string based counters
names := make(map[hostKey]map[string]int) // map of string based names (using a map)
timings := make(map[timerKey]stats.Float64Data) // map of string based float64 values
// initialise counters
for _, v := range []counterKey{FailedDiscoveries, Discoveries, InstancePollSecondsExceeded} {
counters[v] = 0
}
// initialise names
for _, v := range []hostKey{InstanceKeys, FailedInstanceKeys, OkInstanceKeys} {
names[v] = make(map[string]int)
}
// initialise timers
for _, v := range []timerKey{TotalSeconds, BackendSeconds, InstanceSeconds, FailedTotalSeconds, FailedBackendSeconds, FailedInstanceSeconds} {
timings[v] = nil
}
// iterate over results storing required values
for _, v2 := range results {
v := v2.(*Metric) // convert to the right type
// first and last
if first.IsZero() || first.After(v.Timestamp) {
first = v.Timestamp
}
if last.Before(v.Timestamp) {
last = v.Timestamp
}
// different names
x := names[InstanceKeys]
x[v.InstanceKey.String()] = 1 // Value doesn't matter
names[InstanceKeys] = x
if v.Err == nil {
// ok names
x := names[OkInstanceKeys]
x[v.InstanceKey.String()] = 1 // Value doesn't matter
names[OkInstanceKeys] = x
} else {
// failed names
x := names[FailedInstanceKeys]
x[v.InstanceKey.String()] = 1 // Value doesn't matter
names[FailedInstanceKeys] = x
}
// discoveries
counters[Discoveries]++
if v.Err != nil {
counters[FailedDiscoveries]++
}
counters[InstancePollSecondsExceeded] += v.InstancePollSecondsDurationCount
// All timings
timings[TotalSeconds] = append(timings[TotalSeconds], v.TotalLatency.Seconds())
timings[BackendSeconds] = append(timings[BackendSeconds], v.BackendLatency.Seconds())
timings[InstanceSeconds] = append(timings[InstanceSeconds], v.InstanceLatency.Seconds())
// Failed timings
if v.Err != nil {
timings[FailedTotalSeconds] = append(timings[FailedTotalSeconds], v.TotalLatency.Seconds())
timings[FailedBackendSeconds] = append(timings[FailedBackendSeconds], v.BackendLatency.Seconds())
timings[FailedInstanceSeconds] = append(timings[FailedInstanceSeconds], v.InstanceLatency.Seconds())
}
}
return AggregatedDiscoveryMetrics{
FirstSeen: first,
LastSeen: last,
CountDistinctInstanceKeys: len(names[InstanceKeys]),
CountDistinctOkInstanceKeys: len(names[OkInstanceKeys]),
CountDistinctFailedInstanceKeys: len(names[FailedInstanceKeys]),
FailedDiscoveries: counters[FailedDiscoveries],
SuccessfulDiscoveries: counters[Discoveries],
InstancePollSecondsExceeded: counters[InstancePollSecondsExceeded],
MeanTotalSeconds: mean(timings[TotalSeconds]),
MeanBackendSeconds: mean(timings[BackendSeconds]),
MeanInstanceSeconds: mean(timings[InstanceSeconds]),
FailedMeanTotalSeconds: mean(timings[FailedTotalSeconds]),
FailedMeanBackendSeconds: mean(timings[FailedBackendSeconds]),
FailedMeanInstanceSeconds: mean(timings[FailedInstanceSeconds]),
MaxTotalSeconds: max(timings[TotalSeconds]),
MaxBackendSeconds: max(timings[BackendSeconds]),
MaxInstanceSeconds: max(timings[InstanceSeconds]),
FailedMaxTotalSeconds: max(timings[FailedTotalSeconds]),
FailedMaxBackendSeconds: max(timings[FailedBackendSeconds]),
FailedMaxInstanceSeconds: max(timings[FailedInstanceSeconds]),
MedianTotalSeconds: median(timings[TotalSeconds]),
MedianBackendSeconds: median(timings[BackendSeconds]),
MedianInstanceSeconds: median(timings[InstanceSeconds]),
FailedMedianTotalSeconds: median(timings[FailedTotalSeconds]),
FailedMedianBackendSeconds: median(timings[FailedBackendSeconds]),
FailedMedianInstanceSeconds: median(timings[FailedInstanceSeconds]),
P95TotalSeconds: percentile(timings[TotalSeconds], 95),
P95BackendSeconds: percentile(timings[BackendSeconds], 95),
P95InstanceSeconds: percentile(timings[InstanceSeconds], 95),
FailedP95TotalSeconds: percentile(timings[FailedTotalSeconds], 95),
FailedP95BackendSeconds: percentile(timings[FailedBackendSeconds], 95),
FailedP95InstanceSeconds: percentile(timings[FailedInstanceSeconds], 95),
}
}
// AggregatedSince returns a large number of aggregated metrics
// based on the raw metrics collected since the given time.
func AggregatedSince(c *collection.Collection, t time.Time) (AggregatedDiscoveryMetrics, error) {
results, err := c.Since(t)
if err != nil {
return AggregatedDiscoveryMetrics{}, err
}
return aggregate(results), nil
}