diff --git a/filters/filters.go b/filters/filters.go index e53fe8673a..7653bd430f 100644 --- a/filters/filters.go +++ b/filters/filters.go @@ -3,6 +3,7 @@ package filters import ( // import filter packages here: + "github.com/zalando/skipper/filters/flowid" "github.com/zalando/skipper/filters/healthcheck" "github.com/zalando/skipper/filters/humanstxt" "github.com/zalando/skipper/filters/pathrewrite" @@ -28,6 +29,7 @@ func Register(registry skipper.FilterRegistry) { static.Make(), stripquery.Make(), &redirect.Redirect{}, + flowid.New(), ) } diff --git a/filters/flowid/doc.go b/filters/flowid/doc.go new file mode 100644 index 0000000000..14f0a659c0 --- /dev/null +++ b/filters/flowid/doc.go @@ -0,0 +1,100 @@ +/* +Flow Ids let you correlate router logs for a given request against the upstream application logs for that same request. +If your upstream application makes other requests to other services it can provide the same Flow Id value so that all +of those logs can be correlated. + +How it works + +Skipper generates a unique Flow Id for every HTTP request that it receives. The Flow ID is then passed to your +upstream application as an HTTP header called X-Flow-Id. + +The filter takes 2 optional parameters: + 1. Accept existing X-Flow-Id header + 2. Flow Id length + +The first parameter is a string parameter that, when set to "reuse", will make the filter skip the generation of +a new flow id. If the existing header value is not a valid flow id it is ignored and a new flow id is also generated. +Any other string used for this parameter is ignored and have the same meaning - not to accept existing X-Flow-Id +headers. + +The second parameter is a number that defines the length of the generated flow ids. Valid options are any even number +between 8 and 64. + +Usage + +The filter can be used with many different combinations of parameters. It can also be used without any parameter, since +both are options. + +Default parameters + + FlowId() + +Without any parameters, the filter doesn't reuse existing X-Flow-Id headers and generates new ones with 16 bytes. + +Reuse existing flow id + + FlowId("reuse") + +With only the first parameter with the string "reuse" the filter will accept an existing X-Flow-Id header, if +it's present in the request. + +Generate bigger flow ids + + FlowId("fo shizzle", 64) + +This example doesn't accept a X-Flow-Id header and will always generate new flow ids with 64 bytes. + + +Some benchmarks + +To decide upon which hashing mechanism to use we tested some versions of UUID v1 - v4 and some other implementations. +The results are as follow: + + Benchmark_uuidv1-4 5000000 281 ns/op + Benchmark_uuidv2-4 5000000 284 ns/op + Benchmark_uuidv3-4 2000000 605 ns/op + Benchmark_uuidv4-4 1000000 1903 ns/op + BenchmarkRndAndSprintf-4 500000 3312 ns/op + BenchmarkSha1-4 1000000 2188 ns/op + BenchmarkMd5-4 1000000 2076 ns/op + BenchmarkFnv-4 500000 2223 ns/op + +The next approach was just to get len / 2 (hex.DecodedLen) bytes from the crypto/rand and hex encode them. +The performance was only dependent on the length of the generated FlowId and it performed like to the following +benchmarks: + + BenchmarkFlowIdLen8-4 1000000 1157 ns/op + BenchmarkFlowIdLen10-4 1000000 1162 ns/op + BenchmarkFlowIdLen12-4 1000000 1163 ns/op + BenchmarkFlowIdLen14-4 1000000 1171 ns/op + BenchmarkFlowIdLen16-4 1000000 1180 ns/op + BenchmarkFlowIdLen32-4 1000000 1957 ns/op + BenchmarkFlowIdLen64-4 300000 3520 ns/op + +Starting at len = 32 (16 random bytes) the performance started to drop dramatically. + +The current implementation defines a static alphabet and build the flowid using random indexes to get elements from +that alphabet. The initial approach was to get a random index for each element. The performance was: + + BenchmarkFlowIdLen8-4 5000000 375 ns/op + BenchmarkFlowIdLen10-4 3000000 446 ns/op + BenchmarkFlowIdLen12-4 3000000 508 ns/op + BenchmarkFlowIdLen14-4 3000000 579 ns/op + BenchmarkFlowIdLen16-4 2000000 641 ns/op + BenchmarkFlowIdLen32-4 1000000 1179 ns/op + BenchmarkFlowIdLen64-4 1000000 2268 ns/op + +It was possible to optimize this behavior by getting a 64 bit random value and use every 6 bits (a total +of 10 usable random indexes) to get an element from the alphabet. This strategy improved the performance to the +following results: + + BenchmarkFlowIdLen8-4 10000000 159 ns/op + BenchmarkFlowIdLen10-4 10000000 164 ns/op + BenchmarkFlowIdLen12-4 10000000 202 ns/op + BenchmarkFlowIdLen14-4 10000000 206 ns/op + BenchmarkFlowIdLen16-4 10000000 216 ns/op + BenchmarkFlowIdLen32-4 5000000 329 ns/op + BenchmarkFlowIdLen64-4 3000000 532 ns/op + +*/ +package flowid diff --git a/filters/flowid/filter.go b/filters/flowid/filter.go new file mode 100644 index 0000000000..fc1e18a59a --- /dev/null +++ b/filters/flowid/filter.go @@ -0,0 +1,71 @@ +package flowid + +import ( + "github.com/zalando/skipper/skipper" + "log" + "strings" +) + +const ( + filterName = "flowId" + flowIdHeaderName = "X-Flow-Id" + defaultLen = 16 + reuseParameterValue = "reuse" +) + +type flowIdSpec struct{} + +type flowId struct { + id string + reuseExisting bool + flowIdLength int +} + +func New() skipper.FilterSpec { + return &flowIdSpec{} +} + +func (f *flowId) Id() string { return f.id } + +func (f *flowId) Request(fc skipper.FilterContext) { + r := fc.Request() + var flowId string + + if f.reuseExisting { + flowId = r.Header.Get(flowIdHeaderName) + if isValid(flowId) { + return + } + } + + flowId, err := newFlowId(f.flowIdLength) + if err == nil { + r.Header.Set(flowIdHeaderName, flowId) + } else { + log.Println(err) + } +} + +func (f *flowId) Response(skipper.FilterContext) {} + +func (spec *flowIdSpec) MakeFilter(id string, fc skipper.FilterConfig) (skipper.Filter, error) { + var reuseExisting bool + if len(fc) > 0 { + if r, ok := fc[0].(string); ok { + reuseExisting = strings.ToLower(r) == reuseParameterValue + } else { + return nil, skipper.ErrInvalidFilterParameters + } + } + var flowIdLength = defaultLen + if len(fc) > 1 { + if l, ok := fc[1].(float64); ok && l >= minLength && l <= maxLength { + flowIdLength = int(l) + } else { + return nil, skipper.ErrInvalidFilterParameters + } + } + return &flowId{id, reuseExisting, flowIdLength}, nil +} + +func (spec *flowIdSpec) Name() string { return filterName } diff --git a/filters/flowid/filter_test.go b/filters/flowid/filter_test.go new file mode 100644 index 0000000000..cc65a01d61 --- /dev/null +++ b/filters/flowid/filter_test.go @@ -0,0 +1,99 @@ +package flowid + +import ( + "github.com/zalando/skipper/mock" + "github.com/zalando/skipper/skipper" + "net/http" + "testing" +) + +const ( + testFlowId = "FLOW-ID-FOR-TESTING" + invalidFlowId = "[<>] (o) [<>]" +) + +var ( + testFlowIdSpec = &flowIdSpec{} + filterConfigWithReuse = skipper.FilterConfig{reuseParameterValue} + filterConfigWithoutReuse = skipper.FilterConfig{"dummy"} +) + +func TestNewFlowIdGeneration(t *testing.T) { + f, _ := testFlowIdSpec.MakeFilter(filterName, filterConfigWithReuse) + fc := buildfilterContext() + f.Request(fc) + + flowId := fc.Request().Header.Get(flowIdHeaderName) + if !isValid(flowId) { + t.Errorf("'%s' is not a valid flow id", flowId) + } +} + +func TestFlowIdReuseExisting(t *testing.T) { + f, _ := testFlowIdSpec.MakeFilter(filterName, filterConfigWithReuse) + fc := buildfilterContext(flowIdHeaderName, testFlowId) + f.Request(fc) + + flowId := fc.Request().Header.Get(flowIdHeaderName) + if flowId != testFlowId { + t.Errorf("Got wrong flow id. Expected '%s' got '%s'", testFlowId, flowId) + } +} + +func TestFlowIdIgnoreReuseExisting(t *testing.T) { + f, _ := testFlowIdSpec.MakeFilter(filterName, filterConfigWithoutReuse) + fc := buildfilterContext(flowIdHeaderName, testFlowId) + f.Request(fc) + + flowId := fc.Request().Header.Get(flowIdHeaderName) + if flowId == testFlowId { + t.Errorf("Got wrong flow id. Expected a newly generated flowid but got the test flow id '%s'", flowId) + } +} + +func TestFlowIdRejectInvalidReusedFlowId(t *testing.T) { + f, _ := testFlowIdSpec.MakeFilter(filterName, filterConfigWithReuse) + fc := buildfilterContext(flowIdHeaderName, invalidFlowId) + f.Request(fc) + + flowId := fc.Request().Header.Get(flowIdHeaderName) + if flowId == invalidFlowId { + t.Errorf("Got wrong flow id. Expected a newly generated flowid but got the test flow id '%s'", flowId) + } +} + +func TestFlowIdWithSpecificLen(t *testing.T) { + fc := skipper.FilterConfig{reuseParameterValue, float64(42.0)} + f, _ := testFlowIdSpec.MakeFilter(filterName, fc) + fctx := buildfilterContext() + f.Request(fctx) + + flowId := fctx.Request().Header.Get(flowIdHeaderName) + + l := len(flowId) + if l != 42 { + t.Errorf("Wrong flowId len. Expected %d, got %d", 42, l) + } +} + +func TestFlowIdWithInvalidParameters(t *testing.T) { + fc := skipper.FilterConfig{true} + _, err := testFlowIdSpec.MakeFilter(filterName, fc) + if err != skipper.ErrInvalidFilterParameters { + t.Errorf("Expected an invalid parameters error, got %v", err) + } + + fc = skipper.FilterConfig{"", float64(minLength - 1)} + _, err = testFlowIdSpec.MakeFilter(filterName, fc) + if err != skipper.ErrInvalidFilterParameters { + t.Errorf("Expected an invalid parameters error, got %v", err) + } +} + +func buildfilterContext(headers ...string) skipper.FilterContext { + r, _ := http.NewRequest("GET", "http://example.org", nil) + for i := 0; i < len(headers); i += 2 { + r.Header.Set(headers[i], headers[i+1]) + } + return &mock.FilterContext{FRequest: r} +} diff --git a/filters/flowid/hashing.go b/filters/flowid/hashing.go new file mode 100644 index 0000000000..1fcb7c6bf2 --- /dev/null +++ b/filters/flowid/hashing.go @@ -0,0 +1,46 @@ +package flowid + +import ( + "errors" + "fmt" + "math/rand" + "regexp" +) + +const ( + maxLength = 64 + minLength = 8 + flowIdAlphabet = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-+" + alphabetBitMask = 63 +) + +var ( + ErrInvalidLen = errors.New(fmt.Sprintf("Invalid length. Must be between %d and %d", minLength, maxLength)) + flowIdRegex = regexp.MustCompile(`^[0-9a-zA-Z+-]+$`) +) + +// newFlowId returns a random flowId using the flowIdAlphabet with length l +// The alphabet is limited to 64 elements and requires a random 6 bit value to index any of them +// The cost to rnd.IntXX is not very relevant but the bit shifting operations are faster +// For this reason a single call to rnd.Int63 is used and its bits are mapped up to 10 chunks of 6 bits each +// The byte data type carries 2 additional bits for the next chunk which are cleared with the alphabet bit mask +func newFlowId(l int) (string, error) { + if l < minLength || l > maxLength { + return "", ErrInvalidLen + } + + u := make([]byte, l) + for i := 0; i < l; i += 10 { + b := rand.Int63() + for e := 0; e < 10 && i+e < l; e++ { + c := byte(b>>uint(6*e)) & alphabetBitMask // 6 bits only + u[i+e] = flowIdAlphabet[c] + } + } + + return string(u), nil +} + +func isValid(flowId string) bool { + return len(flowId) >= minLength && len(flowId) <= maxLength && flowIdRegex.MatchString(flowId) +} diff --git a/filters/flowid/hashing_test.go b/filters/flowid/hashing_test.go new file mode 100644 index 0000000000..6a7b691b45 --- /dev/null +++ b/filters/flowid/hashing_test.go @@ -0,0 +1,65 @@ +package flowid + +import ( + "testing" +) + +func TestFlowIdInvalidLength(t *testing.T) { + _, err := newFlowId(0) + if err == nil { + t.Errorf("Request for an invalid flow id length (0) succeeded and it shouldn't") + } + + _, err = newFlowId(100) + if err != ErrInvalidLen { + t.Errorf("Request for an invalid flow id length (100) succeeded and it shouldn't") + } +} + +func TestFlowIdLength(t *testing.T) { + for expected := minLength; expected <= maxLength; expected++ { + flowId, err := newFlowId(expected) + if err != nil { + t.Errorf("Failed to generate flowId with len %d", expected) + } + + l := len(flowId) + if l != expected { + t.Errorf("Got wrong flowId len. Requested %d, got %d (%s)", expected, l, flowId) + } + } +} + +func BenchmarkFlowIdLen8(b *testing.B) { + testFlowIdWithLen(b.N, 8) +} + +func BenchmarkFlowIdLen10(b *testing.B) { + testFlowIdWithLen(b.N, 10) +} + +func BenchmarkFlowIdLen12(b *testing.B) { + testFlowIdWithLen(b.N, 12) +} + +func BenchmarkFlowIdLen14(b *testing.B) { + testFlowIdWithLen(b.N, 14) +} + +func BenchmarkFlowIdLen16(b *testing.B) { + testFlowIdWithLen(b.N, 16) +} + +func BenchmarkFlowIdLen32(b *testing.B) { + testFlowIdWithLen(b.N, 32) +} + +func BenchmarkFlowIdLen64(b *testing.B) { + testFlowIdWithLen(b.N, 64) +} + +func testFlowIdWithLen(times int, l int) { + for i := 0; i < times; i++ { + newFlowId(l) + } +} diff --git a/proxy/proxy.go b/proxy/proxy.go index db28ffe4ed..10d6d5b096 100644 --- a/proxy/proxy.go +++ b/proxy/proxy.go @@ -272,4 +272,4 @@ func (p *proxy) ServeHTTP(w http.ResponseWriter, r *http.Request) { func addBranding(rs *http.Response) { rs.Header.Set("X-Powered-By", "Skipper") rs.Header.Set("Server", "Skipper") -} \ No newline at end of file +} diff --git a/skipper/errors.go b/skipper/errors.go new file mode 100644 index 0000000000..ccf89a8172 --- /dev/null +++ b/skipper/errors.go @@ -0,0 +1,7 @@ +package skipper + +import "errors" + +var ( + ErrInvalidFilterParameters = errors.New("Invalid filter parameters") +)