Skip to content

Commit

Permalink
Fix/lifo self healing (#1054)
Browse files Browse the repository at this point in the history
* add better logs for lifo filters

Signed-off-by: Sandor Szücs <sandor.szuecs@zalando.de>

* fix: if lifo filter response is not called, the proxy should execute done() to release the queue

Signed-off-by: Sandor Szücs <sandor.szuecs@zalando.de>

* fix successful retry should not be return an error

Signed-off-by: Sandor Szücs <sandor.szuecs@zalando.de>
  • Loading branch information
szuecs committed May 14, 2019
1 parent 4b3d745 commit 0819a0b
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 10 deletions.
17 changes: 7 additions & 10 deletions filters/scheduler/lifo.go
Expand Up @@ -30,9 +30,6 @@ const (
LIFOName = "lifo"
LIFOGroupName = "lifoGroup"

lifoKey = "lifo-done"
lifoGroupKey = "lifo-group-done"

defaultMaxConcurreny = 100
defaultMaxStackSize = 100
defaultTimeout = 10 * time.Second
Expand Down Expand Up @@ -242,13 +239,13 @@ func (l *lifoFilter) SetKey(k string) {
// - 503 if jobqueue.ErrStackFull
// - 502 if jobqueue.ErrTimeout
func (l *lifoFilter) Request(ctx filters.FilterContext) {
request(l.GetStack(), lifoKey, ctx)
request(l.GetStack(), scheduler.LIFOKey, ctx)
}

// Response is the filter.Filter interface implementation. Response
// will decrease the number of inflight requests.
func (l *lifoFilter) Response(ctx filters.FilterContext) {
response(lifoKey, ctx)
response(scheduler.LIFOKey, ctx)
}

// Config returns the scheduler configuration for the given filter
Expand Down Expand Up @@ -281,13 +278,13 @@ func (*lifoGroupFilter) SetKey(string) {}
// - 503 if jobqueue.ErrStackFull
// - 502 if jobqueue.ErrTimeout
func (l *lifoGroupFilter) Request(ctx filters.FilterContext) {
request(l.GetStack(), lifoGroupKey, ctx)
request(l.GetStack(), scheduler.LIFOKey, ctx)
}

// Response is the filter.Filter interface implementation. Response
// will decrease the number of inflight requests.
func (l *lifoGroupFilter) Response(ctx filters.FilterContext) {
response(lifoGroupKey, ctx)
response(scheduler.LIFOKey, ctx)
}

func request(s *scheduler.Stack, key string, ctx filters.FilterContext) {
Expand All @@ -301,13 +298,13 @@ func request(s *scheduler.Stack, key string, ctx filters.FilterContext) {
// TODO: replace the log with metrics
switch err {
case jobqueue.ErrStackFull:
log.Errorf("Failed to get an entry on to the stack to process: %v", err)
log.Errorf("Failed to get an entry on to the stack to process StackFull: %v for host %s", err, ctx.Request().Host)
ctx.Serve(&http.Response{StatusCode: http.StatusServiceUnavailable, Status: "Stack Full - https://opensource.zalando.com/skipper/operation/operation/#scheduler"})
case jobqueue.ErrTimeout:
log.Errorf("Failed to get an entry on to the stack to process: %v", err)
log.Errorf("Failed to get an entry on to the stack to process Timeout: %v for host %s", err, ctx.Request().Host)
ctx.Serve(&http.Response{StatusCode: http.StatusBadGateway, Status: "Stack timeout - https://opensource.zalando.com/skipper/operation/operation/#scheduler"})
default:
log.Errorf("Unknown error for route based LIFO: %v", err)
log.Errorf("Unknown error for route based LIFO: %v for host %s", err, ctx.Request().Host)
ctx.Serve(&http.Response{StatusCode: http.StatusInternalServerError})
}
return
Expand Down
7 changes: 7 additions & 0 deletions proxy/proxy.go
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/zalando/skipper/metrics"
"github.com/zalando/skipper/ratelimit"
"github.com/zalando/skipper/routing"
"github.com/zalando/skipper/scheduler"
"github.com/zalando/skipper/tracing"
)

Expand Down Expand Up @@ -1024,9 +1025,15 @@ func (p *Proxy) do(ctx *context) error {
if perr2.code >= http.StatusInternalServerError {
p.metrics.MeasureBackend5xx(backendStart)
}
if done := ctx.StateBag()[scheduler.LIFOKey]; done != nil {
done.(func())()
}
return perr2
}
} else {
if done := ctx.StateBag()[scheduler.LIFOKey]; done != nil {
done.(func())()
}
return perr
}
}
Expand Down
4 changes: 4 additions & 0 deletions scheduler/scheduler.go
Expand Up @@ -10,6 +10,10 @@ import (

// note: Config must stay comparable because it is used to detect changes in route specific LIFO config

const (
LIFOKey = "lifo"
)

type Config struct {
Name string
MaxConcurrency int
Expand Down

0 comments on commit 0819a0b

Please sign in to comment.