Skip to content

Commit

Permalink
issue fix 4874 update
Browse files Browse the repository at this point in the history
Signed-off-by: Lyndon-Li <lyonghui@vmware.com>
  • Loading branch information
Lyndon-Li committed Sep 13, 2022
1 parent 30a70cb commit fbb2606
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 22 deletions.
1 change: 1 addition & 0 deletions changelogs/unreleased/5319-lyndon
@@ -0,0 +1 @@
Fix issue 4874 and 4752: check the daemonset pod is running in the node where the workload pod resides before running the PVB for the pod
6 changes: 3 additions & 3 deletions pkg/cmd/server/server.go
Expand Up @@ -533,11 +533,11 @@ var defaultRestorePriorities = []string{
}

func (s *server) initRestic() error {
// warn if restic daemonset does not exist
// warn if node agent does not exist
if err := nodeagent.IsRunning(s.ctx, s.kubeClient, s.namespace); err == nodeagent.DaemonsetNotFound {
s.logger.Warn("Velero restic daemonset not found; restic backups/restores will not work until it's created")
s.logger.Warn("Velero node agent not found; pod volume backups/restores will not work until it's created")
} else if err != nil {
s.logger.WithError(errors.WithStack(err)).Warn("Error checking for existence of velero restic daemonset")
s.logger.WithError(errors.WithStack(err)).Warn("Error checking for existence of velero node agent")
}

// ensure the repo key secret is set up
Expand Down
12 changes: 9 additions & 3 deletions pkg/nodeagent/node_agent.go
Expand Up @@ -23,13 +23,15 @@ import (
"github.com/pkg/errors"
"k8s.io/client-go/kubernetes"

"github.com/vmware-tanzu/velero/pkg/util/kube"

apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
)

const (
// daemonSet is the name of the Velero restic daemonset.
// daemonSet is the name of the Velero node agent daemonset.
daemonSet = "restic"
)

Expand All @@ -48,7 +50,7 @@ func IsRunning(ctx context.Context, kubeClient kubernetes.Interface, namespace s
}
}

// IsRunningInNode checks if the node agent daemonset pod is running properly in a specified node. If not, return the error found
// IsRunningInNode checks if the node agent pod is running properly in a specified node. If not, return the error found
func IsRunningInNode(ctx context.Context, namespace string, nodeName string, podClient corev1client.PodsGetter) error {
if nodeName == "" {
return errors.New("node name is empty")
Expand All @@ -60,10 +62,14 @@ func IsRunningInNode(ctx context.Context, namespace string, nodeName string, pod
}

for _, pod := range pods.Items {
if kube.IsPodRunning(&pod) != nil {
continue
}

if pod.Spec.NodeName == nodeName {
return nil
}
}

return errors.Errorf("daemonset pod not found in node %s", nodeName)
return errors.Errorf("daemonset pod not found in running state in node %s", nodeName)
}
3 changes: 2 additions & 1 deletion pkg/podvolume/backupper.go
Expand Up @@ -35,6 +35,7 @@ import (
"github.com/vmware-tanzu/velero/pkg/nodeagent"
"github.com/vmware-tanzu/velero/pkg/repository"
"github.com/vmware-tanzu/velero/pkg/util/boolptr"
"github.com/vmware-tanzu/velero/pkg/util/kube"
)

// Backupper can execute restic backups of volumes in a pod.
Expand Down Expand Up @@ -125,7 +126,7 @@ func (b *backupper) BackupPodVolumes(backup *velerov1api.Backup, pod *corev1api.
return nil, []error{err}
}

err = IsPodQualified(pod)
err = kube.IsPodRunning(pod)
if err != nil {
return nil, []error{err}
}
Expand Down
15 changes: 0 additions & 15 deletions pkg/podvolume/util.go
Expand Up @@ -22,7 +22,6 @@ import (
corev1api "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/pkg/errors"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/repository"
"github.com/vmware-tanzu/velero/pkg/uploader"
Expand Down Expand Up @@ -296,17 +295,3 @@ func GetPodVolumesUsingRestic(pod *corev1api.Pod, defaultVolumesToRestic bool) [
}
return podVolumes
}

// IsPodQualified checks if the pod's status is qualified for a PVB/PVR to backup/restore its volumes.
// If no, return the error found
func IsPodQualified(pod *corev1api.Pod) error {
if pod.Spec.NodeName == "" {
return errors.Errorf("pod is not scheduled, name=%s, namespace=%s, status=%s", pod.Name, pod.Namespace, pod.Status.Phase)
}

if pod.Status.Phase != corev1api.PodRunning {
return errors.Errorf("pod is not running, name=%s, namespace=%s, status=%s", pod.Name, pod.Namespace, pod.Status.Phase)
}

return nil
}
39 changes: 39 additions & 0 deletions pkg/util/kube/pod.go
@@ -0,0 +1,39 @@
/*
Copyright The Velero Contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kube

import (
"github.com/pkg/errors"
corev1api "k8s.io/api/core/v1"
)

// IsPodRunning does a well-rounded check to make sure the specified pod is running stably.
// If not, return the error found
func IsPodRunning(pod *corev1api.Pod) error {
if pod.Spec.NodeName == "" {
return errors.Errorf("pod is not scheduled, name=%s, namespace=%s, phase=%s", pod.Name, pod.Namespace, pod.Status.Phase)
}

if pod.Status.Phase != corev1api.PodRunning {
return errors.Errorf("pod is not running, name=%s, namespace=%s, phase=%s", pod.Name, pod.Namespace, pod.Status.Phase)
}

if pod.DeletionTimestamp != nil {
return errors.Errorf("pod is being terminated, name=%s, namespace=%s, phase=%s", pod.Name, pod.Namespace, pod.Status.Phase)
}

return nil
}

0 comments on commit fbb2606

Please sign in to comment.