From eafc71ad497b27bdc92aa9620e283db2a3ede564 Mon Sep 17 00:00:00 2001 From: Mustafa Abdelrahman Date: Thu, 25 Apr 2024 18:50:29 +0200 Subject: [PATCH 1/2] add metrics for number of unhealthy endpoints Signed-off-by: Mustafa Abdelrahman --- docs/operation/operation.md | 3 ++- proxy/healthy_endpoints.go | 2 +- proxy/healthy_endpoints_test.go | 5 ++++- proxy/proxy.go | 2 ++ 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/operation/operation.md b/docs/operation/operation.md index 8969916091..8395d00ac4 100644 --- a/docs/operation/operation.md +++ b/docs/operation/operation.md @@ -925,7 +925,8 @@ while choosing the endpoint for the given request A set of metrics will be exposed to track passive health check: -* `passive-health-check.endpoints.dropped`: Number of all endpoints dropped before load balancing a request, so after N requests and M endpoints are being dropped this counter would be N*M. +* Counter `passive-health-check.requests.dropped`: Number of all endpoints dropped before load balancing a request, so after N requests and M endpoints are being dropped this counter would be N*M. +* Gauge `passive-health-check.endpoints.dropped`: Number of unhealthy/filtered endpoints ## Memory consumption diff --git a/proxy/healthy_endpoints.go b/proxy/healthy_endpoints.go index 8e172993bd..163600fc00 100644 --- a/proxy/healthy_endpoints.go +++ b/proxy/healthy_endpoints.go @@ -25,7 +25,7 @@ func (h *healthyEndpoints) filterHealthyEndpoints(ctx *context, endpoints []rout if p < dropProbability { ctx.Logger().Infof("Dropping endpoint %q due to passive health check: p=%0.2f, dropProbability=%0.2f", e.Host, p, dropProbability) - metrics.IncCounter("passive-health-check.endpoints.dropped") + metrics.IncCounter("passive-health-check.requests.dropped") } else { filtered = append(filtered, e) } diff --git a/proxy/healthy_endpoints_test.go b/proxy/healthy_endpoints_test.go index 0ce0682879..ce1ec36577 100644 --- a/proxy/healthy_endpoints_test.go +++ b/proxy/healthy_endpoints_test.go @@ -175,8 +175,11 @@ func TestPHCForMultipleHealthyAndOneUnhealthyEndpoints(t *testing.T) { failedReqs := sendGetRequests(t, ps) assert.InDelta(t, 0, failedReqs, 0.1*float64(nRequests)) mockMetrics.WithCounters(func(counters map[string]int64) { - assert.InDelta(t, float64(nRequests), float64(counters["passive-health-check.endpoints.dropped"]), 0.3*float64(nRequests)) // allow 30% error + assert.InDelta(t, float64(nRequests), float64(counters["passive-health-check.requests.dropped"]), 0.3*float64(nRequests)) // allow 30% error }) + v, ok := mockMetrics.Gauge("passive-health-check.endpoints.dropped") + assert.True(t, ok, "passive-health-check.endpoints.dropped gauge not found") + assert.Equal(t, 1.0, v) }) } diff --git a/proxy/proxy.go b/proxy/proxy.go index 442ea71324..ea29ceca70 100644 --- a/proxy/proxy.go +++ b/proxy/proxy.go @@ -539,8 +539,10 @@ func setRequestURLForDynamicBackend(u *url.URL, stateBag map[string]interface{}) func (p *Proxy) selectEndpoint(ctx *context) *routing.LBEndpoint { rt := ctx.route endpoints := rt.LBEndpoints + beforefiltering := len(endpoints) endpoints = p.fadein.filterFadeIn(endpoints, rt) endpoints = p.heathlyEndpoints.filterHealthyEndpoints(ctx, endpoints, p.metrics) + p.metrics.UpdateGauge("passive-health-check.endpoints.dropped", float64(beforefiltering-len(endpoints))) lbctx := &routing.LBContext{ Request: ctx.request, From c11f4cd6dfbdc88a592fa4ad5e63f1c796f1e13e Mon Sep 17 00:00:00 2001 From: Mustafa Abdelrahman Date: Fri, 26 Apr 2024 15:46:52 +0200 Subject: [PATCH 2/2] Rename metric for better description Signed-off-by: Mustafa Abdelrahman --- docs/operation/operation.md | 2 +- proxy/healthy_endpoints.go | 2 +- proxy/healthy_endpoints_test.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/operation/operation.md b/docs/operation/operation.md index 8395d00ac4..1d40ee86b7 100644 --- a/docs/operation/operation.md +++ b/docs/operation/operation.md @@ -925,7 +925,7 @@ while choosing the endpoint for the given request A set of metrics will be exposed to track passive health check: -* Counter `passive-health-check.requests.dropped`: Number of all endpoints dropped before load balancing a request, so after N requests and M endpoints are being dropped this counter would be N*M. +* Counter `passive-health-check.requests.failures.mitigated`: Number of all possible requests failures mitigated by passive health check, so after N requests and M endpoints this counter could be N*M in worst case scenario (all endpoints aren't healty). * Gauge `passive-health-check.endpoints.dropped`: Number of unhealthy/filtered endpoints ## Memory consumption diff --git a/proxy/healthy_endpoints.go b/proxy/healthy_endpoints.go index 163600fc00..511d44e12b 100644 --- a/proxy/healthy_endpoints.go +++ b/proxy/healthy_endpoints.go @@ -25,7 +25,7 @@ func (h *healthyEndpoints) filterHealthyEndpoints(ctx *context, endpoints []rout if p < dropProbability { ctx.Logger().Infof("Dropping endpoint %q due to passive health check: p=%0.2f, dropProbability=%0.2f", e.Host, p, dropProbability) - metrics.IncCounter("passive-health-check.requests.dropped") + metrics.IncCounter("passive-health-check.requests.failures.mitigated") } else { filtered = append(filtered, e) } diff --git a/proxy/healthy_endpoints_test.go b/proxy/healthy_endpoints_test.go index ce1ec36577..5fccf001e4 100644 --- a/proxy/healthy_endpoints_test.go +++ b/proxy/healthy_endpoints_test.go @@ -175,7 +175,7 @@ func TestPHCForMultipleHealthyAndOneUnhealthyEndpoints(t *testing.T) { failedReqs := sendGetRequests(t, ps) assert.InDelta(t, 0, failedReqs, 0.1*float64(nRequests)) mockMetrics.WithCounters(func(counters map[string]int64) { - assert.InDelta(t, float64(nRequests), float64(counters["passive-health-check.requests.dropped"]), 0.3*float64(nRequests)) // allow 30% error + assert.InDelta(t, float64(nRequests), float64(counters["passive-health-check.requests.failures.mitigated"]), 0.3*float64(nRequests)) // allow 30% error }) v, ok := mockMetrics.Gauge("passive-health-check.endpoints.dropped") assert.True(t, ok, "passive-health-check.endpoints.dropped gauge not found")