Skip to content

Commit

Permalink
Migrate SQL and shadow backend from Uber internal repo (#294)
Browse files Browse the repository at this point in the history
This PR adds a new, SQL-based backend option for the tag datastore. The SQL backend offers increased performance over filesystem based backends for very large Docker registries as it takes advantage of optimizations such as indexing. Block-based storage systems are particularly bad at handling many small files, or walking directory structures. With the SQL backend, when listing the entire Docker catalog you are doing a simple SELECT over an indexed table, rather than a costly filesystem walk.

This also includes a backend abstraction option called a "shadow" backend. This backend is designed for transitioning Kraken to a new backend by allowing admins to specify an "active" and "shadow" backend. Writes are sent
to both backends, but reads only occur from the active. This ensures data consistency between the backends, and
allows the old, proven backed to act as a safety net in the case where the new backend fails or has to be taken
offline due to some unforeseen problems.
  • Loading branch information
SQUIDwarrior committed Jan 12, 2021
1 parent 1e07dd7 commit 645466c
Show file tree
Hide file tree
Showing 13 changed files with 1,521 additions and 7 deletions.
4 changes: 3 additions & 1 deletion build-index/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@ import (
"github.com/uber/kraken/build-index/cmd"

// Import all backend client packages to register them with backend manager.
_ "github.com/uber/kraken/lib/backend/gcsbackend"
_ "github.com/uber/kraken/lib/backend/hdfsbackend"
_ "github.com/uber/kraken/lib/backend/httpbackend"
_ "github.com/uber/kraken/lib/backend/registrybackend"
_ "github.com/uber/kraken/lib/backend/s3backend"
_ "github.com/uber/kraken/lib/backend/gcsbackend"
_ "github.com/uber/kraken/lib/backend/shadowbackend"
_ "github.com/uber/kraken/lib/backend/sqlbackend"
_ "github.com/uber/kraken/lib/backend/testfs"
)

Expand Down
14 changes: 8 additions & 6 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,19 @@ require (
github.com/docker/libtrust v0.0.0-20160708172513-aabc10ec26b7 // indirect
github.com/garyburd/redigo v1.6.0
github.com/go-chi/chi v0.0.0-20190316151245-d08916613452 // indirect
github.com/go-errors/errors v1.1.1
github.com/gofrs/uuid v0.0.0-20190320161447-2593f3d8aa45 // indirect
github.com/golang/mock v1.3.1
github.com/golang/mock v1.4.4
github.com/golang/protobuf v1.3.3
github.com/gomodule/redigo v2.0.0+incompatible // indirect
github.com/gorilla/handlers v0.0.0-20190227193432-ac6d24f88de4 // indirect
github.com/gorilla/mux v1.7.3
github.com/jackpal/bencode-go v0.0.0-20180813173944-227668e840fa
github.com/jinzhu/gorm v1.9.16
github.com/jmoiron/sqlx v0.0.0-20190319043955-cdf62fdf55f6
github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 // indirect
github.com/konsorten/go-windows-terminal-sequences v1.0.2 // indirect
github.com/mattn/go-sqlite3 v1.9.0
github.com/mattn/go-sqlite3 v1.14.0
github.com/opencontainers/go-digest v0.0.0-20190228220655-ac19fd6e7483
github.com/pressly/chi v4.0.2+incompatible
github.com/pressly/goose v2.6.0+incompatible
Expand All @@ -44,17 +46,17 @@ require (
github.com/satori/go.uuid v1.2.0
github.com/spaolacci/murmur3 v0.0.0-20170819071325-9f5d223c6079
github.com/spf13/cobra v0.0.4 // indirect
github.com/stretchr/testify v1.3.0
github.com/stretchr/testify v1.4.0
github.com/uber-go/tally v3.3.11+incompatible
github.com/willf/bitset v0.0.0-20190228212526-18bd95f470f9
github.com/yuin/gopher-lua v0.0.0-20191128022950-c6266f4fe8d7 // indirect
github.com/yvasiyarov/go-metrics v0.0.0-20150112132944-c25f46c4b940 // indirect
github.com/yvasiyarov/gorelic v0.0.0-20180809112600-635ca6035f23 // indirect
github.com/yvasiyarov/newrelic_platform_go v0.0.0-20160601141957-9c099fbc30e9 // indirect
go.uber.org/atomic v1.4.0
go.uber.org/multierr v1.1.0 // indirect
go.uber.org/atomic v1.5.0
go.uber.org/fx v1.13.1 // indirect
go.uber.org/zap v0.0.0-20190327195448-badef736563f
golang.org/x/net v0.0.0-20190620200207-3b0461eec859
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e
golang.org/x/sync v0.0.0-20190423024810-112230192c58
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4
google.golang.org/api v0.7.0
Expand Down
65 changes: 65 additions & 0 deletions go.sum

Large diffs are not rendered by default.

40 changes: 40 additions & 0 deletions lib/backend/shadowbackend/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
Kraken Shadow Datastore
========================
This backend is designed for transitioning Kraken to a new backend by allowing admins to specify an "active" and
"shadow" backend. The term shadow is used because the other backend "shadows" the active backend. Writes are sent
to both backends, but reads only occur from the active. This ensures data consistency between the backends, and
allows the old, proven backed to act as a safety net in the case where the new backend fails or has to be taken
offline due to some unforeseen problems.

Because all reads occur from the active backend, data needs to be manually migrated from the old backend to the new,
before bringing the shadow backend online. This means there will be some downtime incurred during the transition.

Supported Backends
------------------
This currently supports SQL, HDFS, S3, and testfs backends.

Note that this backend is only enabled as a tag datastore for the build-index. However this could be used as the blob
datastore for the origin servers, provided it is not configured to use the SQL backend in active or shadow modes.

Configuration
-------------
The config has two required items "active_backend" and "shadow_backend", each of which function the same as the
standard "backend" in that the backend type and normal configuration is defined under each, depending on which
role that backend will play. For example, here is a configuration that uses the SQL backend as the active, and
the testfs backend as the shadow:
```yaml
backends:
- namespace: .*
backend:
shadow:
active_backend:
sql:
dialect: mysql
connection_string: "kraken:kraken@tcp(kraken-mysql:3306)/kraken?parseTime=True"
debug_logging: true
shadow_backend:
testfs:
addr: localhost:7357
root: tags
name_path: docker_tag
```
224 changes: 224 additions & 0 deletions lib/backend/shadowbackend/client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
// Copyright (c) 2016-2020 Uber Technologies, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package shadowbackend

import (
"errors"
"fmt"
"io"

"github.com/uber/kraken/core"
"github.com/uber/kraken/lib/backend"
"github.com/uber/kraken/lib/backend/backenderrors"
"github.com/uber/kraken/lib/backend/hdfsbackend"
"github.com/uber/kraken/lib/backend/s3backend"
"github.com/uber/kraken/lib/backend/sqlbackend"
"github.com/uber/kraken/lib/backend/testfs"
"github.com/uber/kraken/utils/log"
"gopkg.in/yaml.v2"
)

type factory struct{}

func (f *factory) Name() string {
return "shadow"
}

func (f *factory) Create(
confRaw interface{}, authConfRaw interface{}) (backend.Client, error) {

confBytes, err := yaml.Marshal(confRaw)
if err != nil {
return nil, fmt.Errorf("marshal shadow config: %v", err)
}

var config Config
if err := yaml.Unmarshal(confBytes, &config); err != nil {
return nil, fmt.Errorf("unmarshal shadow config: %v", err)
}
return NewClient(config, authConfRaw)
}

// Client implements a backend.Client for shadow mode. See the README for full details on what shadow mode means.
type Client struct {
cfg Config
active backend.Client
shadow backend.Client
}

// Option allows setting optional Client parameters.
type Option func(*Client)

// NewClient creates a new shadow Client
func NewClient(config Config, authConfRaw interface{}) (*Client, error) {
a, err := getBackendClient(config.ActiveClientConfig, authConfRaw)
if err != nil {
return nil, err
}

s, err := getBackendClient(config.ShadowClientConfig, authConfRaw)
if err != nil {
return nil, err
}

return &Client{
cfg: config,
active: a,
shadow: s,
}, nil
}

func getBackendClient(backendConfig map[string]interface{}, authConfRaw interface{}) (backend.Client, error) {
var name string
var confRaw interface{}

// TODO Re-implementing all the factory functions is bad form, but because backends.getFactory isn't public there
// is no way to access them currently. Opened https://github.com/uber/kraken/issues/213 to address this.
for name, confRaw = range backendConfig {
switch name {
case "sql":
confBytes, err := yaml.Marshal(confRaw)
if err != nil {
return nil, fmt.Errorf("marshal sql config: %s", err)
}
var config sqlbackend.Config
if err := yaml.Unmarshal(confBytes, &config); err != nil {
return nil, fmt.Errorf("unmarshal sql config: %s", err)
}
authConfBytes, err := yaml.Marshal(authConfRaw)
var userAuth sqlbackend.UserAuthConfig
if err := yaml.Unmarshal(authConfBytes, &userAuth); err != nil {
return nil, fmt.Errorf("unmarshal sql auth config: %s", err)
}
return sqlbackend.NewClient(config, userAuth)
case "hdfs":
confBytes, err := yaml.Marshal(confRaw)
if err != nil {
return nil, fmt.Errorf("marshal hdfs config: %s", err)
}

var config hdfsbackend.Config
if err := yaml.Unmarshal(confBytes, &config); err != nil {
return nil, fmt.Errorf("unmarshal hdfs config: %s", err)
}

return hdfsbackend.NewClient(config)
case "s3":
confBytes, err := yaml.Marshal(confRaw)
if err != nil {
return nil, fmt.Errorf("marshal s3 config: %s", err)
}

var config s3backend.Config
if err := yaml.Unmarshal(confBytes, &config); err != nil {
return nil, fmt.Errorf("unmarshal s3 config: %s", err)
}
authConfBytes, err := yaml.Marshal(authConfRaw)
var userAuth s3backend.UserAuthConfig
if err := yaml.Unmarshal(authConfBytes, &userAuth); err != nil {
return nil, fmt.Errorf("unmarshal s3 auth config: %s", err)
}

return s3backend.NewClient(config, userAuth)
case "testfs":
confBytes, err := yaml.Marshal(confRaw)
if err != nil {
return nil, fmt.Errorf("marshal testfs config: %s", err)
}

var config testfs.Config
if err := yaml.Unmarshal(confBytes, &config); err != nil {
return nil, fmt.Errorf("unmarshal testfs config: %s", err)
}

return testfs.NewClient(config)
default:
return nil, fmt.Errorf("unsupported backend type '%s'", name)
}
}

return nil, nil
}

func isNotFoundErr(err error) bool {
return err != nil && err == backenderrors.ErrBlobNotFound
}

// Stat returns a non-nil core.BlobInfo struct if the data exists, an error otherwise.
func (c *Client) Stat(namespace string, name string) (*core.BlobInfo, error) {
// read from both, fail if error from either
res, errA := c.active.Stat(namespace, name)
_, errS := c.shadow.Stat(namespace, name)

if isNotFoundErr(errA) && isNotFoundErr(errS) {
return nil, backenderrors.ErrBlobNotFound
}

if errA != nil || errS != nil {
if errA != nil && errS == nil {
log.Errorf("[Stat] error getting %s for namespace '%s' from active backend: %v", name, namespace, errA)
return nil, errA
}

if errS != nil && errA == nil {
log.Errorf("[Stat] error getting %s for namespace '%s' from shadow backend: %v", name, namespace, errS)
return nil, errS
}

return nil, fmt.Errorf("[Stat] error in both backends for %s in namespace '%s'. active: '%v', shadow: '%v'", name, namespace, errA, errS)
}

return res, nil
}

// Download gets the data from the backend and then writes it to the output writer.
func (c *Client) Download(namespace string, name string, dst io.Writer) error {
err := c.active.Download(namespace, name, dst)
return err
}

// Upload upserts the data into the backend.
func (c *Client) Upload(namespace string, name string, src io.Reader) error {
rs, ok := src.(io.ReadSeeker)
if !ok {
return errors.New("refusing upload: src does not implement io.Seeker")
}

// write to both, fail if write fails for any
err := c.active.Upload(namespace, name, rs)
if err != nil {
return err
}

// Need to rewind the ReadSeeker here before the second upload
if _, err := rs.Seek(0, io.SeekStart); err != nil {
return err
}

err = c.shadow.Upload(namespace, name, rs)
if err != nil {
return err
}

return nil
}

// List lists names with start with prefix.
func (c *Client) List(prefix string, opts ...backend.ListOption) (*backend.ListResult, error) {
res, err := c.active.List(prefix, opts...)
if err != nil {
return nil, err
}
return res, nil
}

0 comments on commit 645466c

Please sign in to comment.