-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpod_failure_policy.go
129 lines (118 loc) · 5.04 KB
/
pod_failure_policy.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package job
import (
"fmt"
batch "k8s.io/api/batch/v1"
v1 "k8s.io/api/core/v1"
)
// matchPodFailurePolicy returns information about matching a given failed pod
// against the pod failure policy rules. The information is represented as an
// optional job failure message (present in case the pod matched a 'FailJob'
// rule), a boolean indicating if the failure should be counted towards
// backoffLimit (it should not be counted if the pod matched an 'Ignore' rule),
// and a pointer to the matched pod failure policy action.
func matchPodFailurePolicy(podFailurePolicy *batch.PodFailurePolicy, failedPod *v1.Pod) (*string, bool, *batch.PodFailurePolicyAction) {
if podFailurePolicy == nil {
return nil, true, nil
}
ignore := batch.PodFailurePolicyActionIgnore
failJob := batch.PodFailurePolicyActionFailJob
count := batch.PodFailurePolicyActionCount
for index, podFailurePolicyRule := range podFailurePolicy.Rules {
if podFailurePolicyRule.OnExitCodes != nil {
if containerStatus := matchOnExitCodes(&failedPod.Status, podFailurePolicyRule.OnExitCodes); containerStatus != nil {
switch podFailurePolicyRule.Action {
case batch.PodFailurePolicyActionIgnore:
return nil, false, &ignore
case batch.PodFailurePolicyActionCount:
return nil, true, &count
case batch.PodFailurePolicyActionFailJob:
msg := fmt.Sprintf("Container %s for pod %s/%s failed with exit code %v matching %v rule at index %d",
containerStatus.Name, failedPod.Namespace, failedPod.Name, containerStatus.State.Terminated.ExitCode, podFailurePolicyRule.Action, index)
return &msg, true, &failJob
}
}
} else if podFailurePolicyRule.OnPodConditions != nil {
if podCondition := matchOnPodConditions(&failedPod.Status, podFailurePolicyRule.OnPodConditions); podCondition != nil {
switch podFailurePolicyRule.Action {
case batch.PodFailurePolicyActionIgnore:
return nil, false, &ignore
case batch.PodFailurePolicyActionCount:
return nil, true, &count
case batch.PodFailurePolicyActionFailJob:
msg := fmt.Sprintf("Pod %s/%s has condition %v matching %v rule at index %d",
failedPod.Namespace, failedPod.Name, podCondition.Type, podFailurePolicyRule.Action, index)
return &msg, true, &failJob
}
}
}
}
return nil, true, nil
}
// matchOnExitCodes returns a terminated container status that matches the error code requirement, if any exists.
// If the returned status is non-nil, it has a non-nil Terminated field.
func matchOnExitCodes(podStatus *v1.PodStatus, requirement *batch.PodFailurePolicyOnExitCodesRequirement) *v1.ContainerStatus {
if containerStatus := getMatchingContainerFromList(podStatus.ContainerStatuses, requirement); containerStatus != nil {
return containerStatus
}
return getMatchingContainerFromList(podStatus.InitContainerStatuses, requirement)
}
func matchOnPodConditions(podStatus *v1.PodStatus, requirement []batch.PodFailurePolicyOnPodConditionsPattern) *v1.PodCondition {
for _, podCondition := range podStatus.Conditions {
for _, pattern := range requirement {
if podCondition.Type == pattern.Type && podCondition.Status == pattern.Status {
return &podCondition
}
}
}
return nil
}
// getMatchingContainerFromList returns the first terminated container status in the list that matches the error code requirement, or nil if none match.
// If the returned status is non-nil, it has a non-nil Terminated field
func getMatchingContainerFromList(containerStatuses []v1.ContainerStatus, requirement *batch.PodFailurePolicyOnExitCodesRequirement) *v1.ContainerStatus {
for _, containerStatus := range containerStatuses {
if containerStatus.State.Terminated == nil {
// This container is still be terminating. There is no exit code to match.
continue
}
if requirement.ContainerName == nil || *requirement.ContainerName == containerStatus.Name {
if containerStatus.State.Terminated.ExitCode != 0 {
if isOnExitCodesOperatorMatching(containerStatus.State.Terminated.ExitCode, requirement) {
return &containerStatus
}
}
}
}
return nil
}
func isOnExitCodesOperatorMatching(exitCode int32, requirement *batch.PodFailurePolicyOnExitCodesRequirement) bool {
switch requirement.Operator {
case batch.PodFailurePolicyOnExitCodesOpIn:
for _, value := range requirement.Values {
if value == exitCode {
return true
}
}
return false
case batch.PodFailurePolicyOnExitCodesOpNotIn:
for _, value := range requirement.Values {
if value == exitCode {
return false
}
}
return true
default:
return false
}
}