Skip to content

Commit 9fbfb31

Browse files
authored
Merge pull request #308 from cschleiden/fix-timer-cancel-race
Fix timer cancel race
2 parents e756c08 + 962756c commit 9fbfb31

File tree

4 files changed

+71
-19
lines changed

4 files changed

+71
-19
lines changed

backend/redis/events_future.go

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,24 +23,25 @@ import (
2323
// Note: this does not work with Redis Cluster since not all keys are passed into the script.
2424
var futureEventsCmd = redis.NewScript(`
2525
-- Find events which should become visible now
26-
local events = redis.call("ZRANGE", KEYS[1], "-inf", ARGV[1], "BYSCORE")
26+
local now = ARGV[1]
27+
local events = redis.call("ZRANGE", KEYS[1], "-inf", now, "BYSCORE")
2728
for i = 1, #events do
2829
local instanceSegment = redis.call("HGET", events[i], "instance")
2930
30-
-- Add event to pending event stream
31-
local eventData = redis.call("HGET", events[i], "event")
32-
local pending_events_key = "pending-events:" .. instanceSegment
33-
redis.call("XADD", pending_events_key, "*", "event", eventData)
34-
35-
-- Try to queue workflow task
36-
local already_queued = redis.call("SADD", KEYS[3], instanceSegment)
37-
if already_queued ~= 0 then
31+
-- Try to queue workflow task. If a workflow task is already queued, ignore this event for now.
32+
local added = redis.call("SADD", KEYS[3], instanceSegment)
33+
if added == 1 then
3834
redis.call("XADD", KEYS[2], "*", "id", instanceSegment, "data", "")
39-
end
4035
41-
-- Delete event hash data
42-
redis.call("DEL", events[i])
43-
redis.call("ZREM", KEYS[1], events[i])
36+
-- Add event to pending event stream
37+
local eventData = redis.call("HGET", events[i], "event")
38+
local pending_events_key = "pending-events:" .. instanceSegment
39+
redis.call("XADD", pending_events_key, "*", "event", eventData)
40+
41+
-- Delete event hash data
42+
redis.call("DEL", events[i])
43+
redis.call("ZREM", KEYS[1], events[i])
44+
end
4445
end
4546
4647
return #events

backend/redis/scripts/complete_workflow_task.lua

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,16 @@ local timersToCancel = tonumber(getArgv())
9191
for i = 1, timersToCancel do
9292
local futureEventKey = getKey()
9393

94-
redis.call("ZREM", futureEventZSetKey, futureEventKey)
95-
-- remove payload
96-
local eventId = redis.call("HGET", futureEventKey, "id")
97-
redis.call("HDEL", payloadHashKey, eventId)
98-
-- remove event hash
99-
redis.call("DEL", futureEventKey)
94+
local eventRemoved = redis.call("ZREM", futureEventZSetKey, futureEventKey)
95+
-- Event might've become visible while this task was being processed, in that
96+
-- case it would be already removed from futureEventZSetKey
97+
if eventRemoved == 1 then
98+
-- remove payload
99+
local eventId = redis.call("HGET", futureEventKey, "id")
100+
redis.call("HDEL", payloadHashKey, eventId)
101+
-- remove event hash
102+
redis.call("DEL", futureEventKey)
103+
end
100104
end
101105

102106
-- Schedule timers

backend/test/e2e.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,7 @@ func EndToEndBackendTest(t *testing.T, setup func(options ...backend.BackendOpti
779779
}
780780

781781
tests = append(tests, e2eActivityTests...)
782+
tests = append(tests, e2eTimerTests...)
782783
tests = append(tests, e2eStatsTests...)
783784

784785
run := func(suffix string, workerOptions worker.Options) {

backend/test/e2e_timer.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package test
2+
3+
import (
4+
"context"
5+
"testing"
6+
"time"
7+
8+
"github.com/cschleiden/go-workflows/client"
9+
"github.com/cschleiden/go-workflows/worker"
10+
"github.com/cschleiden/go-workflows/workflow"
11+
"github.com/stretchr/testify/require"
12+
)
13+
14+
var e2eTimerTests = []backendTest{
15+
{
16+
name: "Timer_ScheduleCancelRace",
17+
f: func(t *testing.T, ctx context.Context, c *client.Client, w *worker.Worker, b TestBackend) {
18+
wf := func(ctx workflow.Context) error {
19+
// 1) Start of first execution slice
20+
tctx, cancel := workflow.WithCancel(ctx)
21+
22+
// 1) Schedule timer
23+
x := workflow.ScheduleTimer(tctx, time.Millisecond*200)
24+
25+
// 1) Force an end to the execution slice
26+
workflow.Sleep(ctx, time.Millisecond)
27+
28+
// 2) Start of second execution slice
29+
// 2) Cancel timer. It should not have fired at this point,
30+
// we only waited for a millisecond
31+
cancel()
32+
33+
x.Get(ctx)
34+
35+
// 2) Force the execution slice to be active for as long as it takes to fire the timer
36+
time.Sleep(time.Millisecond * 200)
37+
38+
return nil
39+
}
40+
register(t, ctx, w, []interface{}{wf}, nil)
41+
42+
_, err := runWorkflowWithResult[any](t, ctx, c, wf)
43+
require.NoError(t, err)
44+
},
45+
},
46+
}

0 commit comments

Comments
 (0)