Skip to content

Commit 6b031e5

Browse files
authored
Merge pull request kubernetes#128713 from tallclair/ippr-debug-events
[FG:InPlacePodVerticalScaling] Emit events for Deferred and Infeasible statuses
2 parents f5d1fdf + 3a2555e commit 6b031e5

File tree

2 files changed

+35
-15
lines changed

2 files changed

+35
-15
lines changed

pkg/kubelet/events/event.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ const (
3434
FailedToCreatePodContainer = "FailedCreatePodContainer"
3535
FailedToMakePodDataDirectories = "Failed"
3636
NetworkNotReady = "NetworkNotReady"
37+
ResizeDeferred = "ResizeDeferred"
38+
ResizeInfeasible = "ResizeInfeasible"
3739
)
3840

3941
// Image event reason list

pkg/kubelet/kubelet.go

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ import (
119119
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
120120
"k8s.io/kubernetes/pkg/kubelet/userns"
121121
"k8s.io/kubernetes/pkg/kubelet/util"
122+
"k8s.io/kubernetes/pkg/kubelet/util/format"
122123
"k8s.io/kubernetes/pkg/kubelet/util/manager"
123124
"k8s.io/kubernetes/pkg/kubelet/util/queue"
124125
"k8s.io/kubernetes/pkg/kubelet/util/sliceutils"
@@ -2838,34 +2839,47 @@ func isPodResizeInProgress(pod *v1.Pod, podStatus *kubecontainer.PodStatus) bool
28382839
// canResizePod determines if the requested resize is currently feasible.
28392840
// pod should hold the desired (pre-allocated) spec.
28402841
// Returns true if the resize can proceed.
2841-
func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, v1.PodResizeStatus) {
2842+
func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, v1.PodResizeStatus, string) {
2843+
if goos == "windows" {
2844+
return false, v1.PodResizeStatusInfeasible, "Resizing Windows pods is not supported"
2845+
}
2846+
28422847
if v1qos.GetPodQOS(pod) == v1.PodQOSGuaranteed && !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScalingExclusiveCPUs) {
28432848
if utilfeature.DefaultFeatureGate.Enabled(features.CPUManager) {
28442849
if kl.containerManager.GetNodeConfig().CPUManagerPolicy == "static" {
2845-
klog.V(3).InfoS("Resize is infeasible for Guaranteed Pods alongside CPU Manager static policy")
2846-
return false, v1.PodResizeStatusInfeasible
2850+
msg := "Resize is infeasible for Guaranteed Pods alongside CPU Manager static policy"
2851+
klog.V(3).InfoS(msg, "pod", format.Pod(pod))
2852+
return false, v1.PodResizeStatusInfeasible, msg
28472853
}
28482854
}
28492855
if utilfeature.DefaultFeatureGate.Enabled(features.MemoryManager) {
28502856
if kl.containerManager.GetNodeConfig().ExperimentalMemoryManagerPolicy == "static" {
2851-
klog.V(3).InfoS("Resize is infeasible for Guaranteed Pods alongside Memory Manager static policy")
2852-
return false, v1.PodResizeStatusInfeasible
2857+
msg := "Resize is infeasible for Guaranteed Pods alongside Memory Manager static policy"
2858+
klog.V(3).InfoS(msg, "pod", format.Pod(pod))
2859+
return false, v1.PodResizeStatusInfeasible, msg
28532860
}
28542861
}
28552862
}
28562863

28572864
node, err := kl.getNodeAnyWay()
28582865
if err != nil {
28592866
klog.ErrorS(err, "getNodeAnyway function failed")
2860-
return false, ""
2867+
return false, "", ""
28612868
}
28622869
cpuAvailable := node.Status.Allocatable.Cpu().MilliValue()
28632870
memAvailable := node.Status.Allocatable.Memory().Value()
28642871
cpuRequests := resource.GetResourceRequest(pod, v1.ResourceCPU)
28652872
memRequests := resource.GetResourceRequest(pod, v1.ResourceMemory)
28662873
if cpuRequests > cpuAvailable || memRequests > memAvailable {
2867-
klog.V(3).InfoS("Resize is not feasible as request exceeds allocatable node resources", "pod", klog.KObj(pod))
2868-
return false, v1.PodResizeStatusInfeasible
2874+
var msg string
2875+
if memRequests > memAvailable {
2876+
msg = fmt.Sprintf("memory, requested: %d, capacity: %d", memRequests, memAvailable)
2877+
} else {
2878+
msg = fmt.Sprintf("cpu, requested: %d, capacity: %d", cpuRequests, cpuAvailable)
2879+
}
2880+
msg = "Node didn't have enough capacity: " + msg
2881+
klog.V(3).InfoS(msg, "pod", klog.KObj(pod))
2882+
return false, v1.PodResizeStatusInfeasible, msg
28692883
}
28702884

28712885
// Treat the existing pod needing resize as a new pod with desired resources seeking admit.
@@ -2876,10 +2890,10 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, v1.PodResizeStatus) {
28762890
if ok, failReason, failMessage := kl.canAdmitPod(allocatedPods, pod); !ok {
28772891
// Log reason and return. Let the next sync iteration retry the resize
28782892
klog.V(3).InfoS("Resize cannot be accommodated", "pod", klog.KObj(pod), "reason", failReason, "message", failMessage)
2879-
return false, v1.PodResizeStatusDeferred
2893+
return false, v1.PodResizeStatusDeferred, failMessage
28802894
}
28812895

2882-
return true, v1.PodResizeStatusInProgress
2896+
return true, v1.PodResizeStatusInProgress, ""
28832897
}
28842898

28852899
// handlePodResourcesResize returns the "allocated pod", which should be used for all resource
@@ -2900,15 +2914,11 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod, podStatus *kubecontaine
29002914
// Pod allocation does not need to be updated.
29012915
return allocatedPod, nil
29022916
}
2903-
if goos == "windows" {
2904-
kl.statusManager.SetPodResizeStatus(pod.UID, v1.PodResizeStatusInfeasible)
2905-
return allocatedPod, nil
2906-
}
29072917

29082918
kl.podResizeMutex.Lock()
29092919
defer kl.podResizeMutex.Unlock()
29102920
// Desired resources != allocated resources. Can we update the allocation to the desired resources?
2911-
fit, resizeStatus := kl.canResizePod(pod)
2921+
fit, resizeStatus, resizeMsg := kl.canResizePod(pod)
29122922
if fit {
29132923
// Update pod resource allocation checkpoint
29142924
if err := kl.statusManager.SetPodAllocation(pod); err != nil {
@@ -2934,6 +2944,14 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod, podStatus *kubecontaine
29342944
}
29352945
if resizeStatus != "" {
29362946
kl.statusManager.SetPodResizeStatus(pod.UID, resizeStatus)
2947+
if resizeMsg != "" {
2948+
switch resizeStatus {
2949+
case v1.PodResizeStatusDeferred:
2950+
kl.recorder.Eventf(pod, v1.EventTypeWarning, events.ResizeDeferred, resizeMsg)
2951+
case v1.PodResizeStatusInfeasible:
2952+
kl.recorder.Eventf(pod, v1.EventTypeWarning, events.ResizeInfeasible, resizeMsg)
2953+
}
2954+
}
29372955
}
29382956
return allocatedPod, nil
29392957
}

0 commit comments

Comments
 (0)