Skip to content

Commit

Permalink
Add controller_requeue_count metric
Browse files Browse the repository at this point in the history
Introducing a new metric controller_requeue_count counting the
number of re-queuing events issued per controller and reason. Current
reasons can be either "optimistic-locking" (logged as INFO)  or
"processing-error" (logged as ERROR).

This adds more visibility to potential issues randing from things like
connection problems to the API or webhooks to possible hard errors.

For context, please see cert-manager#4956
  • Loading branch information
jayme-github committed Mar 25, 2022
1 parent d8fee10 commit d2925ee
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 0 deletions.
2 changes: 2 additions & 0 deletions pkg/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,10 @@ func (c *controller) worker(ctx context.Context) {
if err != nil {
if strings.Contains(err.Error(), genericregistry.OptimisticLockErrorMsg) {
log.Info("re-queuing item due to optimistic locking on resource", "error", err.Error())
c.metrics.IncrementRequeueCount(c.name, "optimisting-locking")
} else {
log.Error(err, "re-queuing item due to error processing")
c.metrics.IncrementRequeueCount(c.name, "processing-error")
}

c.queue.AddRateLimited(obj)
Expand Down
17 changes: 17 additions & 0 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ type Metrics struct {
acmeClientRequestDurationSeconds *prometheus.SummaryVec
acmeClientRequestCount *prometheus.CounterVec
controllerSyncCallCount *prometheus.CounterVec
controllerRequeueCount *prometheus.CounterVec
}

var readyConditionStatuses = [...]cmmeta.ConditionStatus{cmmeta.ConditionTrue, cmmeta.ConditionFalse, cmmeta.ConditionUnknown}
Expand Down Expand Up @@ -157,6 +158,15 @@ func New(log logr.Logger, c clock.Clock) *Metrics {
},
[]string{"controller"},
)

controllerRequeueCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Name: "controller_requeue_count",
Help: "The count of items that where re-queued by a controller.",
},
[]string{"controller", "reason"},
)
)

// Create server and register Prometheus metrics handler
Expand All @@ -172,6 +182,7 @@ func New(log logr.Logger, c clock.Clock) *Metrics {
acmeClientRequestCount: acmeClientRequestCount,
acmeClientRequestDurationSeconds: acmeClientRequestDurationSeconds,
controllerSyncCallCount: controllerSyncCallCount,
controllerRequeueCount: controllerRequeueCount,
}

return m
Expand All @@ -187,6 +198,7 @@ func (m *Metrics) NewServer(ln net.Listener) *http.Server {
m.registry.MustRegister(m.acmeClientRequestDurationSeconds)
m.registry.MustRegister(m.acmeClientRequestCount)
m.registry.MustRegister(m.controllerSyncCallCount)
m.registry.MustRegister(m.controllerRequeueCount)

mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{}))
Expand All @@ -206,3 +218,8 @@ func (m *Metrics) NewServer(ln net.Listener) *http.Server {
func (m *Metrics) IncrementSyncCallCount(controllerName string) {
m.controllerSyncCallCount.WithLabelValues(controllerName).Inc()
}

// IncrementRequeueCount will increase the sync counter for that controller.
func (m *Metrics) IncrementRequeueCount(controllerName, reason string) {
m.controllerRequeueCount.WithLabelValues(controllerName, reason).Inc()
}

0 comments on commit d2925ee

Please sign in to comment.