Skip to content

Commit 2b4ea92

Browse files
author
Guy Baron
authored
added metric report on saga timeout (#114)
1) added reporting saga timeouts to the glue component 2) fixed mysql timeoutmanager error when trying to clear a timeout
1 parent 858d962 commit 2b4ea92

File tree

5 files changed

+39
-1
lines changed

5 files changed

+39
-1
lines changed

gbus/metrics/saga_metrics.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package metrics
2+
3+
import (
4+
"github.com/prometheus/client_golang/prometheus"
5+
"github.com/prometheus/client_golang/prometheus/promauto"
6+
io_prometheus_client "github.com/prometheus/client_model/go"
7+
)
8+
9+
var SagaTimeoutCounter = newSagaTimeoutCounter()
10+
11+
func GetSagaTimeoutCounterValue() (float64, error) {
12+
m := &io_prometheus_client.Metric{}
13+
err := SagaTimeoutCounter.Write(m)
14+
15+
if err != nil {
16+
return 0, err
17+
}
18+
19+
return m.GetCounter().GetValue(), nil
20+
}
21+
22+
func newSagaTimeoutCounter() prometheus.Counter {
23+
return promauto.NewCounter(prometheus.CounterOpts{
24+
Namespace: grabbitPrefix,
25+
Subsystem: "saga",
26+
Name: "timeedout_sagas",
27+
Help: "counting the number of timedout saga instances",
28+
})
29+
}

gbus/saga/glue.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010

1111
"github.com/sirupsen/logrus"
1212
"github.com/wework/grabbit/gbus"
13+
"github.com/wework/grabbit/gbus/metrics"
1314
)
1415

1516
func fqnsFromMessages(objs []gbus.Message) []string {
@@ -247,6 +248,7 @@ func (imsm *Glue) registerEvent(exchange, topic string, event gbus.Message) erro
247248
func (imsm *Glue) TimeoutSaga(tx *sql.Tx, sagaID string) error {
248249

249250
saga, err := imsm.sagaStore.GetSagaByID(tx, sagaID)
251+
250252
//we are assuming that if the TimeoutSaga has been called but no instance returned from the store the saga
251253
//has been completed already and
252254
if err == ErrInstanceNotFound {
@@ -260,6 +262,8 @@ func (imsm *Glue) TimeoutSaga(tx *sql.Tx, sagaID string) error {
260262
imsm.Log().WithError(timeoutErr).WithField("sagaID", sagaID).Error("failed to timeout saga")
261263
return timeoutErr
262264
}
265+
266+
metrics.SagaTimeoutCounter.Inc()
263267
return imsm.completeOrUpdateSaga(tx, saga)
264268
}
265269

gbus/tx/mysql/timeout.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ func (tm *TimeoutManager) RegisterTimeout(tx *sql.Tx, sagaID string, duration ti
186186
//ClearTimeout clears a timeout for a specific saga
187187
func (tm *TimeoutManager) ClearTimeout(tx *sql.Tx, sagaID string) error {
188188

189-
deleteSQL := `delete from ` + tm.timeoutsTableName + ` where saga_id_id = ?`
189+
deleteSQL := `delete from ` + tm.timeoutsTableName + ` where saga_id = ?`
190190
_, err := tx.Exec(deleteSQL, sagaID)
191191
return err
192192
}
File renamed without changes.

tests/saga_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"time"
1010

1111
"github.com/wework/grabbit/gbus"
12+
"github.com/wework/grabbit/gbus/metrics"
1213
)
1314

1415
/*
@@ -225,6 +226,10 @@ func TestSagaTimeout(t *testing.T) {
225226
}
226227

227228
<-proceed
229+
timeoutCounter, e := metrics.GetSagaTimeoutCounterValue()
230+
if timeoutCounter != 1 || e != nil {
231+
t.Errorf("saga timeout counter expected to be 1 actual %v", timeoutCounter)
232+
}
228233
}
229234

230235
func TestSagaSelfMessaging(t *testing.T) {

0 commit comments

Comments
 (0)