Skip to content

Commit

Permalink
Merge 669db4b into 7a79e56
Browse files Browse the repository at this point in the history
  • Loading branch information
argouber committed Oct 1, 2019
2 parents 7a79e56 + 669db4b commit b83bda9
Show file tree
Hide file tree
Showing 13 changed files with 109 additions and 43 deletions.
4 changes: 3 additions & 1 deletion runtime/client_http_response.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,9 @@ func (res *ClientHTTPResponse) finish() {
logFn := res.req.ContextLogger.Info

// emit metrics
res.req.metrics.RecordHistogramDuration(res.req.ctx, clientLatency, res.finishTime.Sub(res.req.startTime))
delta := res.finishTime.Sub(res.req.startTime)
res.req.metrics.RecordTimer(res.req.ctx, clientLatency, delta)
res.req.metrics.RecordHistogramDuration(res.req.ctx, clientLatencyHist, delta)
_, known := knownStatusCodes[res.StatusCode]
if !known {
res.req.Logger.Error(
Expand Down
2 changes: 2 additions & 0 deletions runtime/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ const (
endpointStatus = "endpoint.status"
endpointSystemErrors = "endpoint.system-errors"
endpointLatency = "endpoint.latency"
endpointLatencyHist = "endpoint.latency-hist"

// MetricEndpointPanics is endpoint level panic counter
MetricEndpointPanics = "endpoint.panic"
Expand All @@ -44,6 +45,7 @@ const (
clientAppErrors = "client.app-errors"
clientSystemErrors = "client.system-errors"
clientLatency = "client.latency"
clientLatencyHist = "client.latency-hist"
)

var knownMetrics = []string{
Expand Down
8 changes: 7 additions & 1 deletion runtime/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ type Logger interface {
// ContextMetrics emit metrics with tags extracted from context.
type ContextMetrics interface {
IncCounter(ctx context.Context, name string, value int64)
RecordTimer(ctx context.Context, name string, d time.Duration)
RecordHistogramDuration(ctx context.Context, name string, d time.Duration)
}

Expand All @@ -322,7 +323,12 @@ func (c *contextMetrics) IncCounter(ctx context.Context, name string, value int6
c.scope.Tagged(GetScopeTagsFromCtx(ctx)).Counter(name).Inc(value)
}

// RecordHistogramDuration records the duration with current tags from context
// RecordTimer records the duration with current tags from context
func (c *contextMetrics) RecordTimer(ctx context.Context, name string, d time.Duration) {
c.scope.Tagged(GetScopeTagsFromCtx(ctx)).Timer(name).Record(d)
}

// RecordHistogramDuration records the duration with current tags from context in a histogram
func (c *contextMetrics) RecordHistogramDuration(ctx context.Context, name string, d time.Duration) {
c.scope.Tagged(GetScopeTagsFromCtx(ctx)).Histogram(name, tally.DefaultBuckets).RecordDuration(d)
}
4 changes: 3 additions & 1 deletion runtime/grpc_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,9 @@ func (c *callHelper) Start() {
// This method emits latency and error metric as well as logging in case of error.
func (c *callHelper) Finish(ctx context.Context, err error) context.Context {
c.finishTime = time.Now()
c.metrics.RecordHistogramDuration(ctx, clientLatency, c.finishTime.Sub(c.startTime))
delta := c.finishTime.Sub(c.startTime)
c.metrics.RecordTimer(ctx, clientLatency, delta)
c.metrics.RecordHistogramDuration(ctx, clientLatency, delta)
fields := []zapcore.Field{
zap.Time(logFieldRequestStartTime, c.startTime),
zap.Time(logFieldRequestFinishedTime, c.finishTime),
Expand Down
14 changes: 12 additions & 2 deletions runtime/plugins/m3_aggregator.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ type M3Collector struct {
fallbackSuccessesPrefix string
fallbackFailuresPrefix string
totalDurationPrefix string
totalDurationHistPrefix string
runDurationPrefix string
runDurationHistPrefix string
}

// M3CollectorClient provides configuration that the m3 client will need.
Expand Down Expand Up @@ -81,7 +83,9 @@ func (m *M3CollectorClient) NewM3Collector(name string) metricCollector.MetricCo
fallbackSuccessesPrefix: "circuitbreaker.fallbackSuccesses",
fallbackFailuresPrefix: "circuitbreaker.fallbackFailures",
totalDurationPrefix: "circuitbreaker.totalDuration",
totalDurationHistPrefix: "circuitbreaker.totalDurationHist",
runDurationPrefix: "circuitbreaker.runDuration",
runDurationHistPrefix: "circuitbreaker.runDurationHist",
}
}

Expand All @@ -92,6 +96,10 @@ func (g *M3Collector) incrementCounterMetric(prefix string, i float64) {
g.scope.Counter(prefix).Inc(int64(i))
}

func (g *M3Collector) updateTimerMetric(prefix string, dur time.Duration) {
g.scope.Timer(prefix).Record(dur)
}

func (g *M3Collector) updateHistogramMetric(prefix string, dur time.Duration) {
g.scope.Histogram(prefix, tally.DefaultBuckets).RecordDuration(dur)
}
Expand All @@ -107,8 +115,10 @@ func (g *M3Collector) Update(r metricCollector.MetricResult) {
g.incrementCounterMetric(g.timeoutsPrefix, r.Timeouts)
g.incrementCounterMetric(g.fallbackSuccessesPrefix, r.FallbackSuccesses)
g.incrementCounterMetric(g.fallbackFailuresPrefix, r.FallbackFailures)
g.updateHistogramMetric(g.totalDurationPrefix, r.TotalDuration)
g.updateHistogramMetric(g.runDurationPrefix, r.RunDuration)
g.updateTimerMetric(g.totalDurationPrefix, r.TotalDuration)
g.updateHistogramMetric(g.totalDurationHistPrefix, r.TotalDuration)
g.updateTimerMetric(g.runDurationPrefix, r.RunDuration)
g.updateHistogramMetric(g.runDurationHistPrefix, r.RunDuration)
}

// Reset is a noop operation in this collector.
Expand Down
11 changes: 7 additions & 4 deletions runtime/runtime_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ type runtimeMetrics struct {
// number of completed GC cycles
numGC tally.Counter
// GC pause time
gcPauseMs tally.Histogram
gcPauseMs tally.Timer
gcPauseMsHist tally.Histogram
}

// runtimeCollector keeps the current state of runtime metrics
Expand Down Expand Up @@ -122,8 +123,9 @@ func NewRuntimeMetricsCollector(
stackInuse: scope.Gauge("memory.stack"),

// GC
numGC: scope.Counter("memory.num-gc"),
gcPauseMs: scope.Histogram("memory.gc-pause-ms", tally.DefaultBuckets),
numGC: scope.Counter("memory.num-gc"),
gcPauseMs: scope.Timer("memory.gc-pause-ms"),
gcPauseMsHist: scope.Histogram("memory.gc-pause-ms-hist", tally.DefaultBuckets),
},
running: false,
stop: make(chan struct{}),
Expand Down Expand Up @@ -217,7 +219,8 @@ func (r *runtimeCollector) collectGCMetrics(memStats *runtime.MemStats) {

for i := lastNum; i != num; i++ {
pause := memStats.PauseNs[i%uint32(len(memStats.PauseNs))]
r.metrics.gcPauseMs.RecordDuration(time.Duration(pause))
r.metrics.gcPauseMs.Record(time.Duration(pause))
r.metrics.gcPauseMsHist.RecordDuration(time.Duration(pause))
}
}

Expand Down
4 changes: 3 additions & 1 deletion runtime/server_http_response.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,9 @@ func (res *ServerHTTPResponse) finish(ctx context.Context) {
// no need to put this tag on the context because this is the end of response life cycle
statusTag := map[string]string{scopeTagStatus: fmt.Sprintf("%d", res.StatusCode)}
tagged := res.scope.Tagged(statusTag)
tagged.Histogram(endpointLatency, tally.DefaultBuckets).RecordDuration(res.finishTime.Sub(res.Request.startTime))
delta := res.finishTime.Sub(res.Request.startTime)
tagged.Timer(endpointLatency).Record(delta)
tagged.Histogram(endpointLatencyHist, tally.DefaultBuckets).RecordDuration(delta)
if !known {
res.logger.Error(
"Unknown status code",
Expand Down
4 changes: 3 additions & 1 deletion runtime/tchannel_inbound_call.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ func (c *tchannelInboundCall) finish(ctx context.Context, err error) {
} else {
c.scope.Counter(endpointSuccess).Inc(1)
}
c.scope.Histogram(endpointLatency, tally.DefaultBuckets).RecordDuration(c.finishTime.Sub(c.startTime))
delta := c.finishTime.Sub(c.startTime)
c.scope.Timer(endpointLatency).Record(delta)
c.scope.Histogram(endpointLatencyHist, tally.DefaultBuckets).RecordDuration(delta)
c.scope.Counter(endpointRequest).Inc(1)

fields := c.logFields(ctx)
Expand Down
4 changes: 3 additions & 1 deletion runtime/tchannel_outbound_call.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ func (c *tchannelOutboundCall) finish(ctx context.Context, err error) {
} else {
c.metrics.IncCounter(ctx, clientSuccess, 1)
}
c.metrics.RecordHistogramDuration(ctx, clientLatency, c.finishTime.Sub(c.startTime))
delta := c.finishTime.Sub(c.startTime)
c.metrics.RecordTimer(ctx, clientLatency, delta)
c.metrics.RecordHistogramDuration(ctx, clientLatencyHist, delta)

// write logs
fields := c.logFields(ctx)
Expand Down
32 changes: 18 additions & 14 deletions test/endpoints/bar/bar_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,13 @@ func TestCallMetrics(t *testing.T) {
cbKeys = append(cbKeys, key)
}
}
assert.Equal(t, 4, len(cbKeys))
assert.Equal(t, 6, len(cbKeys)) // number off because of gratuitous histogram metric
for key := range metrics {
if strings.HasPrefix(key, "jaeger") {
delete(metrics, key)
}
}
assert.Equal(t, numMetrics, len(metrics))
assert.Equal(t, numMetrics+4, len(metrics))

endpointTags := map[string]string{
"env": "test",
Expand Down Expand Up @@ -123,19 +123,17 @@ func TestCallMetrics(t *testing.T) {
}
key := tally.KeyForPrefixedStringMap("endpoint.request", endpointTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
key = tally.KeyForPrefixedStringMap("endpoint.latency", histogramTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
assert.Equal(t, int64(1), *metrics[key].MetricValue.Count.I64Value)

inboundLatency := metrics[tally.KeyForPrefixedStringMap(
"endpoint.latency", histogramTags,
)]
assert.Equal(t, int64(1), *inboundLatency.MetricValue.Count.I64Value)
key = tally.KeyForPrefixedStringMap("endpoint.latency", statusTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
value := *metrics[key].MetricValue.Timer.I64Value
assert.True(t, value > 1000, "expected latency > 1000 nano seconds")
assert.True(t, value < 10*1000*1000, "expected latency < 10 milli seconds")

inboundRecvd := metrics[tally.KeyForPrefixedStringMap(
"endpoint.request", endpointTags,
)]
value := *inboundRecvd.MetricValue.Count.I64Value
assert.Equal(t, int64(1), value)
key = tally.KeyForPrefixedStringMap("endpoint.latency-hist", histogramTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
assert.Equal(t, int64(1), *metrics[key].MetricValue.Count.I64Value)

inboundStatus := metrics[tally.KeyForPrefixedStringMap(
"endpoint.status", statusTags,
Expand Down Expand Up @@ -179,7 +177,13 @@ func TestCallMetrics(t *testing.T) {
assert.Contains(t, metrics, key, "expected metric: %s", key)
assert.Equal(t, int64(1), *metrics[key].MetricValue.Count.I64Value, "expected counter to be 1")

key = tally.KeyForPrefixedStringMap("client.latency", cHistogramTags)
key = tally.KeyForPrefixedStringMap("client.latency", httpClientTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
value = *metrics[key].MetricValue.Timer.I64Value
assert.True(t, value > 1000, "expected latency > 1000 nano second")
assert.True(t, value < 10*1000*1000, "expected latency < 10 milli second")

key = tally.KeyForPrefixedStringMap("client.latency-hist", cHistogramTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
assert.Equal(t, int64(1), *metrics[key].MetricValue.Count.I64Value)

Expand Down
23 changes: 18 additions & 5 deletions test/endpoints/baz/baz_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,14 @@ func TestCallMetrics(t *testing.T) {
cbKeys = append(cbKeys, key)
}
}
assert.Equal(t, 4, len(cbKeys))
assert.Equal(t, 6, len(cbKeys)) // number off because of the histogram
// we don't care about jaeger emitted metrics
for key := range metrics {
if strings.HasPrefix(key, "jaeger") {
delete(metrics, key)
}
}
assert.Equal(t, numMetrics, len(metrics))
assert.Equal(t, numMetrics+4, len(metrics)) // magic number here because there are histogram entries

endpointTags := map[string]string{
"env": "test",
Expand Down Expand Up @@ -138,10 +138,17 @@ func TestCallMetrics(t *testing.T) {
value := *recvdMetric.MetricValue.Count.I64Value
assert.Equal(t, int64(1), value, "expected counter to be 1")

key = tally.KeyForPrefixedStringMap("endpoint.latency", histogramTags)
key = tally.KeyForPrefixedStringMap("endpoint.latency", statusTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
latencyMetric := metrics[key]
assert.Equal(t, int64(1), *latencyMetric.MetricValue.Count.I64Value)
value = *latencyMetric.MetricValue.Timer.I64Value
assert.True(t, value > 1000, "expected timer to be >1000 nano seconds")
assert.True(t, value < 10*1000*1000, "expected timer to be < 10 milli seconds")

key = tally.KeyForPrefixedStringMap("endpoint.latency-hist", histogramTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
latencyHistMetric := metrics[key]
assert.Equal(t, int64(1), *latencyHistMetric.MetricValue.Count.I64Value)

statusMetric := metrics[tally.KeyForPrefixedStringMap(
"endpoint.status", statusTags,
Expand Down Expand Up @@ -200,14 +207,20 @@ func TestCallMetrics(t *testing.T) {
assert.Equal(t, int64(1), *value.MetricValue.Count.I64Value, "expected counter to be 1")
}

key = tally.KeyForPrefixedStringMap("client.latency", clientTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
value = *metrics[key].MetricValue.Timer.I64Value
assert.True(t, value > 1000, "expected timer to be >1000 nano seconds")
assert.True(t, value < 10*1000*1000, "expected timer to be < 10 milli seconds")

cHistogramTags := map[string]string{
m3.DefaultHistogramBucketName: "-infinity-10ms",
m3.DefaultHistogramBucketIDName: "0000",
}
for k, v := range clientTags {
cHistogramTags[k] = v
}
key = tally.KeyForPrefixedStringMap("client.latency", cHistogramTags)
key = tally.KeyForPrefixedStringMap("client.latency-hist", cHistogramTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
assert.Equal(t, int64(1), *metrics[key].MetricValue.Count.I64Value)
}
24 changes: 18 additions & 6 deletions test/endpoints/tchannel/baz/baz_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ func TestCallMetrics(t *testing.T) {
delete(metrics, key)
}
}
assert.Equal(t, numMetrics, len(metrics))
assert.Equal(t, numMetrics, len(metrics)-2) // number to go away after we remove the gratuitous histograms for timer metrics

endpointNames := []string{
"endpoint.request",
Expand All @@ -143,14 +143,20 @@ func TestCallMetrics(t *testing.T) {
assert.Equal(t, int64(1), *metrics[key].MetricValue.Count.I64Value)
}

key := tally.KeyForPrefixedStringMap("endpoint.latency", endpointTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
value := *metrics[key].MetricValue.Timer.I64Value
assert.True(t, value > 1000, "expected timer to be >1000 nano seconds")
assert.True(t, value < 10*1000*1000, "expected timer to be < 10 milli seconds")

histogramTags := map[string]string{
m3.DefaultHistogramBucketName: "-infinity-10ms", // TODO(argouber): Remove the ugly hardcoding
m3.DefaultHistogramBucketName: "-infinity-10ms", // TODO: Remove the ugly hardcoding
m3.DefaultHistogramBucketIDName: "0000",
}
for k, v := range endpointTags {
histogramTags[k] = v
}
key := tally.KeyForPrefixedStringMap("endpoint.latency", histogramTags)
key = tally.KeyForPrefixedStringMap("endpoint.latency-hist", histogramTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
assert.Equal(t, int64(1), *metrics[key].MetricValue.Count.I64Value)

Expand All @@ -177,9 +183,9 @@ func TestCallMetrics(t *testing.T) {
"outbound.calls.per-attempt.latency",
tchannelOutboundTags,
)]
value := *outboundLatency.MetricValue.Timer.I64Value
value = *outboundLatency.MetricValue.Timer.I64Value
assert.True(t, value > 1000, "expected timer to be >1000 nano seconds")
assert.True(t, value < 1000*1000*1000, "expected timer to be <1 second")
assert.True(t, value < 10*1000*1000, "expected timer to be 10 milli second")

clientNames := []string{
"client.request",
Expand Down Expand Up @@ -208,14 +214,20 @@ func TestCallMetrics(t *testing.T) {
assert.Equal(t, int64(1), *metrics[key].MetricValue.Count.I64Value, "expected counter to be 1")
}

key = tally.KeyForPrefixedStringMap("client.latency", clientTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
value = *metrics[key].MetricValue.Timer.I64Value
assert.True(t, value > 1000, "expected timer to be >1000 nano seconds")
assert.True(t, value < 10*1000*1000, "expected timer to be < 10 milli seconds")

cHistogramTags := map[string]string{
m3.DefaultHistogramBucketName: "-infinity-10ms",
m3.DefaultHistogramBucketIDName: "0000",
}
for k, v := range clientTags {
cHistogramTags[k] = v
}
key = tally.KeyForPrefixedStringMap("client.latency", cHistogramTags)
key = tally.KeyForPrefixedStringMap("client.latency-hist", cHistogramTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
assert.Equal(t, int64(1), *metrics[key].MetricValue.Count.I64Value)
}
18 changes: 12 additions & 6 deletions test/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,18 +158,23 @@ func TestHealthMetrics(t *testing.T) {

key := tally.KeyForPrefixedStringMap("endpoint.request", tags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
key = tally.KeyForPrefixedStringMap("endpoint.latency", histogramTags)
key = tally.KeyForPrefixedStringMap("endpoint.latency", statusTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
key = tally.KeyForPrefixedStringMap("endpoint.latency-hist", histogramTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)

statusKey := tally.KeyForPrefixedStringMap(
"endpoint.status", statusTags,
)
assert.Contains(t, metrics, statusKey, "expected metrics: %s", statusKey)

latencyMetric := metrics[tally.KeyForPrefixedStringMap(
"endpoint.latency", histogramTags,
)]
value := *latencyMetric.MetricValue.Count.I64Value
latencyMetric := metrics[tally.KeyForPrefixedStringMap("endpoint.latency", statusTags)]
value := *latencyMetric.MetricValue.Timer.I64Value
assert.True(t, value > 1000, "expected timer to be >1000 nano seconds")
assert.True(t, value < 10*1000*1000, "expected timer to be <10 milli seconds")

latencyHistMetric := metrics[tally.KeyForPrefixedStringMap("endpoint.latency-hist", histogramTags)]
value = *latencyHistMetric.MetricValue.Count.I64Value
assert.Equal(t, int64(1), value)

recvdMetric := metrics[tally.KeyForPrefixedStringMap(
Expand Down Expand Up @@ -211,8 +216,9 @@ func TestRuntimeMetrics(t *testing.T) {
"runtime.memory.stack",

"runtime.memory.num-gc",
"runtime.memory.gc-pause-ms",
}
histogramName := "runtime.memory.gc-pause-ms"
histogramName := "runtime.memory.gc-pause-ms-hist"

// this is a shame because first GC takes 20s to kick in
// only then gc stats can be collected
Expand Down

0 comments on commit b83bda9

Please sign in to comment.