Skip to content

Commit

Permalink
Merge d189dd3 into 08f7967
Browse files Browse the repository at this point in the history
  • Loading branch information
argouber committed Sep 26, 2019
2 parents 08f7967 + d189dd3 commit 871b6b7
Show file tree
Hide file tree
Showing 15 changed files with 154 additions and 179 deletions.
1 change: 0 additions & 1 deletion coverage.tmp

This file was deleted.

2 changes: 1 addition & 1 deletion runtime/client_http_response.go
Expand Up @@ -172,7 +172,7 @@ func (res *ClientHTTPResponse) finish() {
logFn := res.req.ContextLogger.Info

// emit metrics
res.req.metrics.RecordTimer(res.req.ctx, clientLatency, res.finishTime.Sub(res.req.startTime))
res.req.metrics.RecordHistogramDuration(res.req.ctx, clientLatency, res.finishTime.Sub(res.req.startTime))
_, known := knownStatusCodes[res.StatusCode]
if !known {
res.req.Logger.Error(
Expand Down
18 changes: 12 additions & 6 deletions runtime/context.go
Expand Up @@ -303,7 +303,9 @@ type Logger interface {
// ContextMetrics emit metrics with tags extracted from context.
type ContextMetrics interface {
IncCounter(ctx context.Context, name string, value int64)
RecordTimer(ctx context.Context, name string, d time.Duration)
// @deprecated -- use Histogram instead
// RecordTimer(ctx context.Context, name string, d time.Duration)
RecordHistogramDuration(ctx context.Context, name string, d time.Duration)
}

type contextMetrics struct {
Expand All @@ -319,12 +321,16 @@ func NewContextMetrics(scope tally.Scope) ContextMetrics {

// IncCounter increments the counter with current tags from context
func (c *contextMetrics) IncCounter(ctx context.Context, name string, value int64) {
tags := GetScopeTagsFromCtx(ctx)
c.scope.Tagged(tags).Counter(name).Inc(value)
c.scope.Tagged(GetScopeTagsFromCtx(ctx)).Counter(name).Inc(value)
}

// @deprecated
// RecordTimer records the duration with current tags from context
func (c *contextMetrics) RecordTimer(ctx context.Context, name string, d time.Duration) {
tags := GetScopeTagsFromCtx(ctx)
c.scope.Tagged(tags).Timer(name).Record(d)
//func (c *contextMetrics) RecordTimer(ctx context.Context, name string, d time.Duration) {
// c.scope.Tagged(GetScopeTagsFromCtx(ctx)).Timer(name).Record(d)
//}

// RecordHistogram records the duration with current tags from context
func (c *contextMetrics) RecordHistogramDuration(ctx context.Context, name string, d time.Duration) {
c.scope.Tagged(GetScopeTagsFromCtx(ctx)).Histogram(name, tally.DefaultBuckets).RecordDuration(d)
}
1 change: 1 addition & 0 deletions runtime/gateway.go
Expand Up @@ -549,6 +549,7 @@ func (gateway *Gateway) setupMetrics(config *StaticConfig) (err error) {
tally.ScopeOptions{
Tags: defaultTags,
CachedReporter: gateway.metricsBackend,
DefaultBuckets: tally.MustMakeExponentialDurationBuckets(10*time.Millisecond, 4, 6), // Range: 10ms - 10s
Separator: tally.DefaultSeparator,
SanitizeOptions: &m3.DefaultSanitizerOpts,
},
Expand Down
2 changes: 1 addition & 1 deletion runtime/grpc_client.go
Expand Up @@ -124,7 +124,7 @@ func (c *callHelper) Start() {
// This method emits latency and error metric as well as logging in case of error.
func (c *callHelper) Finish(ctx context.Context, err error) context.Context {
c.finishTime = time.Now()
c.metrics.RecordTimer(ctx, clientLatency, c.finishTime.Sub(c.startTime))
c.metrics.RecordHistogramDuration(ctx, clientLatency, c.finishTime.Sub(c.startTime))
fields := []zapcore.Field{
zap.Time(logFieldRequestStartTime, c.startTime),
zap.Time(logFieldRequestFinishedTime, c.finishTime),
Expand Down
17 changes: 10 additions & 7 deletions runtime/plugins/m3_aggregator.go
Expand Up @@ -89,13 +89,16 @@ func (g *M3Collector) incrementCounterMetric(prefix string, i float64) {
if i == 0 {
return
}
c := g.scope.Counter(prefix)
c.Inc(int64(i))
g.scope.Counter(prefix).Inc(int64(i))
}

func (g *M3Collector) updateTimerMetric(prefix string, dur time.Duration) {
c := g.scope.Timer(prefix)
c.Record(dur)
// @deprecated in favor of Histogram. Reopen ONLY if there is a need to
//func (g *M3Collector) updateTimerMetric(prefix string, dur time.Duration) {
// g.scope.Timer(prefix).Record(dur)
//}

func (g *M3Collector) updateHistogramMetric(prefix string, dur time.Duration) {
g.scope.Histogram(prefix, tally.DefaultBuckets).RecordDuration(dur)
}

// Update is a callback by hystrix lib to relay the metrics to m3
Expand All @@ -109,8 +112,8 @@ func (g *M3Collector) Update(r metricCollector.MetricResult) {
g.incrementCounterMetric(g.timeoutsPrefix, r.Timeouts)
g.incrementCounterMetric(g.fallbackSuccessesPrefix, r.FallbackSuccesses)
g.incrementCounterMetric(g.fallbackFailuresPrefix, r.FallbackFailures)
g.updateTimerMetric(g.totalDurationPrefix, r.TotalDuration)
g.updateTimerMetric(g.runDurationPrefix, r.RunDuration)
g.updateHistogramMetric(g.totalDurationPrefix, r.TotalDuration)
g.updateHistogramMetric(g.runDurationPrefix, r.RunDuration)
}

// Reset is a noop operation in this collector.
Expand Down
6 changes: 3 additions & 3 deletions runtime/runtime_metrics.go
Expand Up @@ -65,7 +65,7 @@ type runtimeMetrics struct {
// number of completed GC cycles
numGC tally.Counter
// GC pause time
gcPauseMs tally.Timer
gcPauseMs tally.Histogram
}

// runtimeCollector keeps the current state of runtime metrics
Expand Down Expand Up @@ -123,7 +123,7 @@ func NewRuntimeMetricsCollector(

// GC
numGC: scope.Counter("memory.num-gc"),
gcPauseMs: scope.Timer("memory.gc-pause-ms"),
gcPauseMs: scope.Histogram("memory.gc-pause-ms", tally.DefaultBuckets),
},
running: false,
stop: make(chan struct{}),
Expand Down Expand Up @@ -217,7 +217,7 @@ func (r *runtimeCollector) collectGCMetrics(memStats *runtime.MemStats) {

for i := lastNum; i != num; i++ {
pause := memStats.PauseNs[i%uint32(len(memStats.PauseNs))]
r.metrics.gcPauseMs.Record(time.Duration(pause))
r.metrics.gcPauseMs.RecordDuration(time.Duration(pause))
}
}

Expand Down
2 changes: 1 addition & 1 deletion runtime/server_http_response.go
Expand Up @@ -94,7 +94,7 @@ func (res *ServerHTTPResponse) finish(ctx context.Context) {
// no need to put this tag on the context because this is the end of response life cycle
statusTag := map[string]string{scopeTagStatus: fmt.Sprintf("%d", res.StatusCode)}
tagged := res.scope.Tagged(statusTag)
tagged.Timer(endpointLatency).Record(res.finishTime.Sub(res.Request.startTime))
tagged.Histogram(endpointLatency, tally.DefaultBuckets).RecordDuration(res.finishTime.Sub(res.Request.startTime))
if !known {
res.logger.Error(
"Unknown status code",
Expand Down
2 changes: 1 addition & 1 deletion runtime/tchannel_inbound_call.go
Expand Up @@ -66,7 +66,7 @@ func (c *tchannelInboundCall) finish(ctx context.Context, err error) {
} else {
c.scope.Counter(endpointSuccess).Inc(1)
}
c.scope.Timer(endpointLatency).Record(c.finishTime.Sub(c.startTime))
c.scope.Histogram(endpointLatency, tally.DefaultBuckets).RecordDuration(c.finishTime.Sub(c.startTime))
c.scope.Counter(endpointRequest).Inc(1)

fields := c.logFields(ctx)
Expand Down
2 changes: 1 addition & 1 deletion runtime/tchannel_outbound_call.go
Expand Up @@ -64,7 +64,7 @@ func (c *tchannelOutboundCall) finish(ctx context.Context, err error) {
} else {
c.metrics.IncCounter(ctx, clientSuccess, 1)
}
c.metrics.RecordTimer(ctx, clientLatency, c.finishTime.Sub(c.startTime))
c.metrics.RecordHistogramDuration(ctx, clientLatency, c.finishTime.Sub(c.startTime))

// write logs
fields := c.logFields(ctx)
Expand Down
2 changes: 1 addition & 1 deletion scripts/cover.sh
Expand Up @@ -34,7 +34,7 @@ echo "Starting coverage tests:"

for file in "${FILES_ARR[@]}"; do

if grep -q -v $file <<<$REAL_TEST_FILES; then
if ! grep -q $file <<<$REAL_TEST_FILES; then
continue
fi

Expand Down
89 changes: 35 additions & 54 deletions test/endpoints/bar/bar_metrics_test.go
Expand Up @@ -28,6 +28,7 @@ import (

"github.com/stretchr/testify/assert"
"github.com/uber-go/tally"
"github.com/uber-go/tally/m3"
testGateway "github.com/uber/zanzibar/test/lib/test_gateway"
"github.com/uber/zanzibar/test/lib/util"
)
Expand Down Expand Up @@ -108,33 +109,32 @@ func TestCallMetrics(t *testing.T) {
"protocol": "HTTP",
}
statusTags := map[string]string{
"env": "test",
"service": "test-gateway",
"status": "200",
"endpointid": "bar",
"handlerid": "normal",
"regionname": "san_francisco",
"device": "ios",
"deviceversion": "carbon",
"dc": "unknown",
"protocol": "HTTP",
"status": "200",
}
for k, v := range endpointTags {
statusTags[k] = v
}
histogramTags := map[string]string{
m3.DefaultHistogramBucketName: "-infinity-10ms", // TODO(argouber): Remove the ugly hardcoding
m3.DefaultHistogramBucketIDName: "0000",
}
for k, v := range statusTags {
histogramTags[k] = v
}
key := tally.KeyForPrefixedStringMap("endpoint.request", endpointTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
key = tally.KeyForPrefixedStringMap("endpoint.latency", statusTags)
key = tally.KeyForPrefixedStringMap("endpoint.latency", histogramTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)

inboundLatency := metrics[tally.KeyForPrefixedStringMap(
"endpoint.latency", statusTags,
"endpoint.latency", histogramTags,
)]
value := *inboundLatency.MetricValue.Timer.I64Value
assert.True(t, value > 1000, "expected timer to be >1000 nano seconds")
assert.True(t, value < 1000*1000*1000, "expected timer to be <1 second")
assert.Equal(t, int64(1), *inboundLatency.MetricValue.Count.I64Value)

inboundRecvd := metrics[tally.KeyForPrefixedStringMap(
"endpoint.request", endpointTags,
)]
value = *inboundRecvd.MetricValue.Count.I64Value
value := *inboundRecvd.MetricValue.Count.I64Value
assert.Equal(t, int64(1), value)

inboundStatus := metrics[tally.KeyForPrefixedStringMap(
Expand All @@ -143,10 +143,6 @@ func TestCallMetrics(t *testing.T) {
value = *inboundStatus.MetricValue.Count.I64Value
assert.Equal(t, int64(1), value, "expected counter to be 1")

httpClientNames := []string{
"client.latency",
"client.request",
}
httpClientTags := map[string]string{
"env": "test",
"service": "test-gateway",
Expand All @@ -162,45 +158,30 @@ func TestCallMetrics(t *testing.T) {
"protocol": "HTTP",
}
cStatusTags := map[string]string{
"env": "test",
"service": "test-gateway",
"clientid": "bar",
"clientmethod": "Normal",
"targetendpoint": "Bar--normal",
"status": "200",
"dc": "unknown",
"endpointid": "bar",
"handlerid": "normal",
"regionname": "san_francisco",
"device": "ios",
"deviceversion": "carbon",
"protocol": "HTTP",
"status": "200",
}

for _, name := range httpClientNames {
key := tally.KeyForPrefixedStringMap(name, httpClientTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
for k, v := range httpClientTags {
cStatusTags[k] = v
}
cHistogramTags := map[string]string{
m3.DefaultHistogramBucketName: "-infinity-10ms", // TODO(argouber): Remove the ugly hardcoding
m3.DefaultHistogramBucketIDName: "0000",
}
for k, v := range httpClientTags {
cHistogramTags[k] = v
}

outboundLatency := metrics[tally.KeyForPrefixedStringMap(
"client.latency", httpClientTags,
)]
value = *outboundLatency.MetricValue.Timer.I64Value
assert.True(t, value > 1000, "expected timer to be >1000 nano seconds")
assert.True(t, value < 1000*1000*1000, "expected timer to be <1 second")
key = tally.KeyForPrefixedStringMap("client.request", httpClientTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
assert.Equal(t, int64(1), *metrics[key].MetricValue.Count.I64Value, "expected counter to be 1")

outboundSent := metrics[tally.KeyForPrefixedStringMap(
"client.request", httpClientTags,
)]
value = *outboundSent.MetricValue.Count.I64Value
assert.Equal(t, int64(1), value, "expected counter to be 1")
key = tally.KeyForPrefixedStringMap("client.status", cStatusTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
assert.Equal(t, int64(1), *metrics[key].MetricValue.Count.I64Value, "expected counter to be 1")

statusSuccess := metrics[tally.KeyForPrefixedStringMap(
"client.status",
cStatusTags,
)]
value = *statusSuccess.MetricValue.Count.I64Value
assert.Equal(t, int64(1), value, "expected counter to be 1")
key = tally.KeyForPrefixedStringMap("client.latency", cHistogramTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
assert.Equal(t, int64(1), *metrics[key].MetricValue.Count.I64Value)

allLogs := gateway.AllLogs()

Expand Down
74 changes: 31 additions & 43 deletions test/endpoints/baz/baz_metrics_test.go
Expand Up @@ -28,6 +28,7 @@ import (

"github.com/stretchr/testify/assert"
"github.com/uber-go/tally"
"github.com/uber-go/tally/m3"
bazClient "github.com/uber/zanzibar/examples/example-gateway/build/clients/baz"
clientsBazBaz "github.com/uber/zanzibar/examples/example-gateway/build/gen-code/clients/baz/baz"
zanzibar "github.com/uber/zanzibar/runtime"
Expand Down Expand Up @@ -119,35 +120,29 @@ func TestCallMetrics(t *testing.T) {
"protocol": "HTTP",
}
statusTags := map[string]string{
"env": "test",
"service": "test-gateway",
"status": "204",
"endpointid": "baz",
"handlerid": "call",
"regionname": "san_francisco",
"device": "ios",
"deviceversion": "carbon",
"dc": "unknown",
"protocol": "HTTP",
"status": "204",
}
for k, v := range endpointTags {
statusTags[k] = v
}
histogramTags := map[string]string{
m3.DefaultHistogramBucketName: "-infinity-10ms", // TODO(argouber): Remove the ugly hardcoding
m3.DefaultHistogramBucketIDName: "0000",
}
for k, v := range statusTags {
histogramTags[k] = v
}
key := tally.KeyForPrefixedStringMap("endpoint.request", endpointTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
key = tally.KeyForPrefixedStringMap("endpoint.latency", statusTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)

latencyMetric := metrics[tally.KeyForPrefixedStringMap(
"endpoint.latency", statusTags,
)]
value := *latencyMetric.MetricValue.Timer.I64Value
assert.True(t, value > 1000, "expected timer to be >1000 nano seconds")
assert.True(t, value < 1000*1000*1000, "expected timer to be <1 second")

recvdMetric := metrics[tally.KeyForPrefixedStringMap(
"endpoint.request", endpointTags,
)]
value = *recvdMetric.MetricValue.Count.I64Value
recvdMetric := metrics[key]
value := *recvdMetric.MetricValue.Count.I64Value
assert.Equal(t, int64(1), value, "expected counter to be 1")

key = tally.KeyForPrefixedStringMap("endpoint.latency", histogramTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
latencyMetric := metrics[key]
assert.Equal(t, int64(1), *latencyMetric.MetricValue.Count.I64Value)

statusMetric := metrics[tally.KeyForPrefixedStringMap(
"endpoint.status", statusTags,
)]
Expand Down Expand Up @@ -180,7 +175,6 @@ func TestCallMetrics(t *testing.T) {
assert.True(t, value < 1000*1000*1000, "expected timer to be <1 second")

clientNames := []string{
"client.latency",
"client.request",
"client.success",
}
Expand All @@ -202,24 +196,18 @@ func TestCallMetrics(t *testing.T) {
for _, name := range clientNames {
key := tally.KeyForPrefixedStringMap(name, clientTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
value := metrics[key]
assert.Equal(t, int64(1), *value.MetricValue.Count.I64Value, "expected counter to be 1")
}

outboundLatency := metrics[tally.KeyForPrefixedStringMap(
"client.latency", clientTags,
)]
value = *outboundLatency.MetricValue.Timer.I64Value
assert.True(t, value > 1000, "expected timer to be >1000 nano seconds")
assert.True(t, value < 1000*1000*1000, "expected timer to be <1 second")

outboundSent := metrics[tally.KeyForPrefixedStringMap(
"client.request", clientTags,
)]
value = *outboundSent.MetricValue.Count.I64Value
assert.Equal(t, int64(1), value, "expected counter to be 1")

outboundSuccess := metrics[tally.KeyForPrefixedStringMap(
"client.success", clientTags,
)]
value = *outboundSuccess.MetricValue.Count.I64Value
assert.Equal(t, int64(1), value, "expected counter to be 1")
cHistogramTags := map[string]string{
m3.DefaultHistogramBucketName: "-infinity-10ms",
m3.DefaultHistogramBucketIDName: "0000",
}
for k, v := range clientTags {
cHistogramTags[k] = v
}
key = tally.KeyForPrefixedStringMap("client.latency", cHistogramTags)
assert.Contains(t, metrics, key, "expected metric: %s", key)
assert.Equal(t, int64(1), *metrics[key].MetricValue.Count.I64Value)
}

0 comments on commit 871b6b7

Please sign in to comment.