Skip to content
This repository has been archived by the owner on Nov 3, 2023. It is now read-only.

Commit

Permalink
Harden default creation for parallel invocation
Browse files Browse the repository at this point in the history
If multiple build commands are run in parallel without a builder already
existing, there are race conditions where those default creations can
get confused and cause builds to fail
This adds a new test case to verify parallel builds work, and hardens
the bootstrapping code to detect "already exists" errors and retry
  • Loading branch information
Daniel Hiltgen committed Dec 17, 2020
1 parent f9f7fe9 commit 714c034
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 5 deletions.
1 change: 0 additions & 1 deletion Makefile
Expand Up @@ -93,4 +93,3 @@ cover.html: cover-int.out cover-unit.out
.PHONY: lint
lint:
golangci-lint run

4 changes: 3 additions & 1 deletion integration/common/kubeclient.go
Expand Up @@ -62,7 +62,9 @@ func RunSimpleBuildImageAsPod(ctx context.Context, name, imageName, namespace st

defer func() {
err := podClient.Delete(ctx, pod.Name, metav1.DeleteOptions{})
logrus.Warnf("failed to clean up pod %s: %s", pod.Name, err)
if err != nil {
logrus.Warnf("failed to clean up pod %s: %s", pod.Name, err)
}
}()

logrus.Infof("waiting for pod to start...")
Expand Down
92 changes: 92 additions & 0 deletions integration/suites/parallel_default_test.go
@@ -0,0 +1,92 @@
// Copyright (C) 2020 VMware, Inc.
// SPDX-License-Identifier: Apache-2.0
package suites

import (
"context"
"fmt"
"sync"
"testing"

"github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"github.com/vmware-tanzu/buildkit-cli-for-kubectl/integration/common"

"k8s.io/client-go/kubernetes"
v1 "k8s.io/client-go/kubernetes/typed/core/v1"
)

const ParallelDefaultBuildCount = 3

type parallelDefaultSuite struct {
suite.Suite
Name string
CreateFlags []string

ClientSet *kubernetes.Clientset
Namespace string

configMapClient v1.ConfigMapInterface
}

func (s *parallelDefaultSuite) SetupSuite() {
var err error
s.ClientSet, s.Namespace, err = common.GetKubeClientset()
require.NoError(s.T(), err, "%s: kube client failed", s.Name)
s.configMapClient = s.ClientSet.CoreV1().ConfigMaps(s.Namespace)
}

func (s *parallelDefaultSuite) TestParallelDefaultBuilds() {
logrus.Infof("%s: Parallel %d Build", s.Name, ParallelDefaultBuildCount)

dirs := make([]string, ParallelDefaultBuildCount)
errors := make([]error, ParallelDefaultBuildCount)

// Create the contexts before threading
for i := 0; i < ParallelDefaultBuildCount; i++ {
dir, cleanup, err := common.NewSimpleBuildContext()
dirs[i] = dir
require.NoError(s.T(), err, "Failed to set up temporary build context")
defer cleanup()
}
wg := &sync.WaitGroup{}
wg.Add(ParallelDefaultBuildCount)

for i := 0; i < ParallelDefaultBuildCount; i++ {
go func(i int) {
defer wg.Done()
imageName := fmt.Sprintf("dummy.acme.com/pbuild:%d", i)
args := []string{
"--progress=plain",
"--tag", imageName,
dirs[i],
}
err := common.RunBuild(args)
if err != nil {
errors[i] = err
return
}
errors[i] = common.RunSimpleBuildImageAsPod(
context.Background(),
fmt.Sprintf("%s-testbuiltimage-%d", s.Name, i),
imageName,
s.Namespace,
s.ClientSet,
)

}(i)
}
wg.Wait()
for i := 0; i < ParallelDefaultBuildCount; i++ {
require.NoError(s.T(), errors[i], "build/run %d failed", i)
}
}

func TestParallelDefaultBuildSuite(t *testing.T) {
common.Skipper(t)
// We don't parallelize with other tests, since we use the default builder name
suite.Run(t, &parallelDefaultSuite{
Name: "buildkit",
})
}
15 changes: 12 additions & 3 deletions pkg/driver/driver.go
Expand Up @@ -6,6 +6,8 @@ import (
"context"
"io"
"net"
"strings"
"time"

"github.com/moby/buildkit/client"
"github.com/moby/buildkit/session"
Expand Down Expand Up @@ -33,6 +35,8 @@ const (
Stopped
)

const maxBootRetries = 3

func (s Status) String() string {
switch s {
case Inactive:
Expand Down Expand Up @@ -98,21 +102,26 @@ func Boot(ctx context.Context, d Driver, pw progress.Writer) (*client.Client, st
}
try++
if info.Status != Running {
if try > 2 {
if try > maxBootRetries {
return nil, "", errors.Errorf("failed to bootstrap %T driver in attempts", d)
}
if err := d.Bootstrap(ctx, func(s *client.SolveStatus) {
if pw != nil {
pw.Status() <- s
}
}); err != nil {
}); err != nil && (strings.Contains(err.Error(), "already exists") || strings.Contains(err.Error(), "not found")) {
// This most likely means another build is running in parallel
// Give it just enough time to finish creating resources then retry
time.Sleep(250 * time.Millisecond)
continue
} else if err != nil {
return nil, "", err
}
}

c, chosenNodeName, err := d.Client(ctx)
if err != nil {
if errors.Cause(err) == ErrNotRunning && try <= 2 {
if errors.Cause(err) == ErrNotRunning && try <= maxBootRetries {
continue
}
return nil, "", err
Expand Down

0 comments on commit 714c034

Please sign in to comment.