Skip to content

Fix: cooldown reset on pod restart #8057

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 34 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
f31f6ac
feat: added MaxDeletionCandidateStaleness
MenD32 May 17, 2025
0af55ac
refactor: added description comments for public functions
MenD32 May 17, 2025
be3a1b9
fix: PR comments
MenD32 Jun 5, 2025
98949f0
fix: PR comments
MenD32 Jun 5, 2025
5664908
fix: PR comments
MenD32 Jun 5, 2025
6590d55
fix: PR comments
MenD32 Jun 5, 2025
2835740
tests: debugging TestStaticAutoscalerRunOnceWithExistingDeletionCandi…
MenD32 Jun 6, 2025
32fafcf
Merge branch 'master' into fix/max-deletion-candidate-staleness
MenD32 Jun 6, 2025
387d146
tests: debugging TestStaticAutoscalerRunOnceWithExistingDeletionCandi…
MenD32 Jun 6, 2025
714bc68
tests: debugging TestStaticAutoscalerRunOnceWithExistingDeletionCandi…
MenD32 Jun 6, 2025
531feb3
Merge branch 'master' into fix/max-deletion-candidate-staleness
MenD32 Jun 6, 2025
0203ab6
Merge branch 'master' into fix/max-deletion-candidate-staleness
MenD32 Jun 11, 2025
3a1ae27
fix: changed maxDeletionCandidateStaleness to maxDeletionCandidateTTL…
MenD32 Jun 26, 2025
422d17f
fix: typo in function name updateInternalState
MenD32 Jun 26, 2025
30b5cdb
fix: cleaned up unneeded nodes loading
MenD32 Jun 26, 2025
4b09315
fix: cleared up taint staleness log
MenD32 Jun 26, 2025
3f7b299
fix: removed old comment
MenD32 Jun 26, 2025
412fb48
fix: fixed log on node skip due to stale deleltionCandidate taint
MenD32 Jun 27, 2025
837c330
fix: changed log from plural to singular
MenD32 Jun 27, 2025
899abd6
refactor: removed old flag naming maxDeletionCandidateTTL -> nodeDele…
MenD32 Jun 27, 2025
a27cd62
fix: simplified CleanAllTaints' logic of ignoring nodes without taintKey
MenD32 Jun 27, 2025
f5e1172
fix: moved taint cleaning from static_autoscaler initialization to sc…
MenD32 Jun 27, 2025
b4c3661
fix: gofmt on flags.go
MenD32 Jun 27, 2025
43b5ba9
fix: add check to planner.New() that allnodelister isn't nil
MenD32 Jun 28, 2025
b8890bf
fix: add check to planner.New() that allnodelister isn't nil
MenD32 Jun 28, 2025
713b9c3
fix: taint deleting made conditional on non-default deletionCandidate…
MenD32 Jun 28, 2025
95bc665
tests: moved tests of deletionCandidate taint management from static_…
MenD32 Jun 28, 2025
4d790ba
tests: moved tests of deletionCandidate taint management from static_…
MenD32 Jun 28, 2025
f63a66a
tests: readded TestStaticAutoscalerRunOnceWithExistingDeletionCandida…
MenD32 Jul 2, 2025
ddf3d0a
lint: added comment to NewDynamicTestNodeLister
MenD32 Jul 2, 2025
5f818ca
lint: added comment to NewDynamicTestNodeLister
MenD32 Jul 2, 2025
de0f705
fix: added DeletionCandidateStalenessTTL on new nodes when intializin…
MenD32 Jul 18, 2025
10e656f
fix: build error on unneeded/nodes_test.go
MenD32 Jul 18, 2025
7d15006
fix(tests): bad DeletionCandidateStalenessTTL in TestNodeLoadFromExis…
MenD32 Jul 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cluster-autoscaler/config/autoscaling_options.go
Original file line number Diff line number Diff line change
@@ -338,6 +338,9 @@ type AutoscalingOptions struct {
ProactiveScaleupEnabled bool
// PodInjectionLimit limits total number of pods while injecting fake pods.
PodInjectionLimit int
// NodeDeletionCandidateTTL is the maximum time a node can be marked as removable without being deleted.
// This is used to prevent nodes from being stuck in the removable state during if the CA deployment becomes inactive.
NodeDeletionCandidateTTL time.Duration
}

// KubeClientOptions specify options for kube client
2 changes: 2 additions & 0 deletions cluster-autoscaler/config/flags/flags.go
Original file line number Diff line number Diff line change
@@ -226,6 +226,7 @@ var (
enableDynamicResourceAllocation = flag.Bool("enable-dynamic-resource-allocation", false, "Whether logic for handling DRA (Dynamic Resource Allocation) objects is enabled.")
clusterSnapshotParallelism = flag.Int("cluster-snapshot-parallelism", 16, "Maximum parallelism of cluster snapshot creation.")
checkCapacityProcessorInstance = flag.String("check-capacity-processor-instance", "", "Name of the processor instance. Only ProvisioningRequests that define this name in their parameters with the key \"processorInstance\" will be processed by this CA instance. It only refers to check capacity ProvisioningRequests, but if not empty, best-effort atomic ProvisioningRequests processing is disabled in this instance. Not recommended: Until CA 1.35, ProvisioningRequests with this name as prefix in their class will be also processed.")
nodeDeletionCandidateTTL = flag.Duration("node-deletion-candidate-ttl", time.Duration(0), "Maximum time a node can be marked as removable before the marking becomes stale. This sets the TTL of Cluster-Autoscaler's state if the Cluste-Autoscaler deployment becomes inactive")

// Deprecated flags
ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group (Deprecated, use startup-taints instead)")
@@ -406,6 +407,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
NodeInfoCacheExpireTime: *nodeInfoCacheExpireTime,
ProactiveScaleupEnabled: *proactiveScaleupEnabled,
PodInjectionLimit: *podInjectionLimit,
NodeDeletionCandidateTTL: *nodeDeletionCandidateTTL,
}
}

8 changes: 7 additions & 1 deletion cluster-autoscaler/core/scaledown/planner/planner.go
Original file line number Diff line number Diff line change
@@ -85,10 +85,16 @@ func New(context *context.AutoscalingContext, processors *processors.Autoscaling
if minUpdateInterval == 0*time.Nanosecond {
minUpdateInterval = 1 * time.Nanosecond
}

unneededNodes := unneeded.NewNodes(processors.NodeGroupConfigProcessor, resourceLimitsFinder)
if context.AutoscalingOptions.NodeDeletionCandidateTTL != 0 {
unneededNodes.LoadFromExistingTaints(context.ListerRegistry, time.Now(), context.AutoscalingOptions.NodeDeletionCandidateTTL)
}

return &Planner{
context: context,
unremovableNodes: unremovable.NewNodes(),
unneededNodes: unneeded.NewNodes(processors.NodeGroupConfigProcessor, resourceLimitsFinder),
unneededNodes: unneededNodes,
rs: simulator.NewRemovalSimulator(context.ListerRegistry, context.ClusterSnapshot, deleteOptions, drainabilityRules, true),
actuationInjector: scheduling.NewHintingSimulator(),
eligibilityChecker: eligibility.NewChecker(processors.NodeGroupConfigProcessor),
136 changes: 135 additions & 1 deletion cluster-autoscaler/core/scaledown/planner/planner_test.go
Original file line number Diff line number Diff line change
@@ -36,11 +36,14 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/status"
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/unremovable"
. "k8s.io/autoscaler/cluster-autoscaler/core/test"
"k8s.io/autoscaler/cluster-autoscaler/estimator"
processorstest "k8s.io/autoscaler/cluster-autoscaler/processors/test"
"k8s.io/autoscaler/cluster-autoscaler/simulator"
"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot"
"k8s.io/autoscaler/cluster-autoscaler/simulator/options"
"k8s.io/autoscaler/cluster-autoscaler/simulator/utilization"
"k8s.io/autoscaler/cluster-autoscaler/utils/drain"
"k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
"k8s.io/autoscaler/cluster-autoscaler/utils/taints"
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
@@ -463,7 +466,7 @@ func TestUpdateClusterState(t *testing.T) {
wantUnremovable: []string{"n1", "n2", "n3", "n4"},
},
{
name: "Simulation timeout is hitted",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that is just a type I noticed, I can revert if that's an issue

name: "Simulation timeout is hit",
nodes: []*apiv1.Node{
BuildTestNode("n1", 1000, 10),
BuildTestNode("n2", 1000, 10),
@@ -706,6 +709,137 @@ func TestUpdateClusterStatUnneededNodesLimit(t *testing.T) {
}
}

// TestNewPlannerWithExistingDeletionCandidateNodes tests that the newPlanner correctly handles existing deletion candidate taints on nodes.
func TestNewPlannerWithExistingDeletionCandidateNodes(t *testing.T) {
// Use a table-driven approach where each test case includes its own set of nodes and expected behavior
type testCase struct {
name string
allNodes []*apiv1.Node
expectedDeletionCandidateNodes []*apiv1.Node
nodeDeletionCandidateTTL time.Duration
}

// Common test setup
deletionCandidateTaint := taints.DeletionCandidateTaint()
currentTime := time.Now()

// Node that should be deleted
n1 := BuildTestNode("n1", 1000, 1000)
SetNodeReadyState(n1, true, currentTime)
nt1 := deletionCandidateTaint
ntt1 := currentTime.Add(-time.Minute * 2)
nt1.Value = fmt.Sprint(ntt1.Unix())
n1.Spec.Taints = append(n1.Spec.Taints, nt1)

// Node whose DeletionCandidateTaint has lapsed, shouldn't be deleted
n2 := BuildTestNode("n2", 1000, 1000)
SetNodeReadyState(n2, true, currentTime)
nt2 := deletionCandidateTaint
ntt2 := currentTime.Add(-time.Minute * 10)
nt2.Value = fmt.Sprint(ntt2.Unix())
n2.Spec.Taints = append(n2.Spec.Taints, nt2)

// Node that is marked for deletion, but should have that mark removed
n3 := BuildTestNode("n3", 1000, 1000)
SetNodeReadyState(n3, true, currentTime)
nt3 := deletionCandidateTaint
ntt3 := currentTime.Add(-time.Minute * 2)
nt3.Value = fmt.Sprint(ntt3.Unix())
n3.Spec.Taints = append(n3.Spec.Taints, nt3)

// Node with invalid DeletionCandidateTaint, taint should be deleted
n4 := BuildTestNode("n4", 1000, 1000)
SetNodeReadyState(n4, true, currentTime)
nt4 := deletionCandidateTaint
nt4.Value = "invalid-value"
n4.Spec.Taints = append(n4.Spec.Taints, nt4)

// Node with no DeletionCandidateTaint, should not be deleted
n5 := BuildTestNode("n5", 1000, 1000)
SetNodeReadyState(n5, true, currentTime)

// Pod that blocks eviction on node n3
p1 := BuildTestPod("p1", 600, 100)
p1.Spec.NodeName = n3.Name
p1.SetAnnotations(
map[string]string{
drain.PodSafeToEvictKey: "false",
},
)

testCases := []testCase{
{
name: "All deletion candidate nodes with standard TTL",
allNodes: []*apiv1.Node{n1, n2, n3},
expectedDeletionCandidateNodes: []*apiv1.Node{n1},
nodeDeletionCandidateTTL: time.Minute * 5,
},
{
name: "Node without deletion candidate taint should not be deleted",
allNodes: []*apiv1.Node{n5},
expectedDeletionCandidateNodes: []*apiv1.Node{},
nodeDeletionCandidateTTL: time.Minute * 5,
},
{
name: "Node with invalid deletion candidate taint should be deleted",
allNodes: []*apiv1.Node{n4},
expectedDeletionCandidateNodes: []*apiv1.Node{},
nodeDeletionCandidateTTL: time.Minute * 5,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
readyNodeLister := kubernetes.NewTestNodeLister(nil)
allNodeLister := kubernetes.NewTestNodeLister(nil)

readyNodeLister.SetNodes(tc.allNodes)
allNodeLister.SetNodes(tc.allNodes)

autoscalingOptions := config.AutoscalingOptions{
NodeGroupDefaults: config.NodeGroupAutoscalingOptions{
ScaleDownUnneededTime: time.Minute,
ScaleDownUnreadyTime: time.Minute,
ScaleDownUtilizationThreshold: 0.5,
MaxNodeProvisionTime: 10 * time.Second,
},
EstimatorName: estimator.BinpackingEstimatorName,
EnforceNodeGroupMinSize: true,
ScaleDownEnabled: true,
MaxNodesTotal: 100,
MaxCoresTotal: 100,
MaxMemoryTotal: 100000,
NodeDeletionCandidateTTL: tc.nodeDeletionCandidateTTL,
}

provider := testprovider.NewTestCloudProviderBuilder().Build()
for _, node := range tc.allNodes {
provider.AddNode("ng1", node)
}

context, err := NewScaleTestAutoscalingContext(
autoscalingOptions,
&fake.Clientset{},
kube_util.NewListerRegistry(
allNodeLister,
readyNodeLister,
nil, nil, nil, nil, nil, nil, nil,
),

provider,
nil,
nil,
)
assert.NoError(t, err)

deleteOptions := options.NodeDeleteOptions{}
p := New(&context, processorstest.NewTestProcessors(&context), deleteOptions, nil)

p.unneededNodes.AsList()
})
}
}

func TestNodesToDelete(t *testing.T) {
testCases := []struct {
name string
79 changes: 73 additions & 6 deletions cluster-autoscaler/core/scaledown/unneeded/nodes.go
Original file line number Diff line number Diff line change
@@ -17,6 +17,7 @@ limitations under the License.
package unneeded

import (
"fmt"
"reflect"
"time"

@@ -30,6 +31,7 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/simulator"
"k8s.io/autoscaler/cluster-autoscaler/utils"
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
"k8s.io/autoscaler/cluster-autoscaler/utils/taints"

apiv1 "k8s.io/api/core/v1"
klog "k8s.io/klog/v2"
@@ -63,20 +65,85 @@ func NewNodes(sdtg scaleDownTimeGetter, limitsFinder *resource.LimitsFinder) *No
}
}

// LoadFromExistingTaints loads any existing DeletionCandidateTaint taints from the kubernetes cluster. given a TTL for the taint
func (n *Nodes) LoadFromExistingTaints(listerRegistry kube_util.ListerRegistry, ts time.Time, DeletionCandidateStalenessTTL time.Duration) error {
allNodes, err := listerRegistry.AllNodeLister().List()
if err != nil {
return fmt.Errorf("failed to list nodes when initializing unneeded nodes: %v", err)
}

var nodesWithTaints []simulator.NodeToBeRemoved
for _, node := range allNodes {
if since, err := taints.GetDeletionCandidateTime(node); err == nil && since != nil {
if err != nil {
klog.Errorf("Failed to get pods to move for node %s: %v", node.Name, err)
continue
}
if since.Add(DeletionCandidateStalenessTTL).Before(ts) {
klog.V(4).Infof("Skipping node %s with deletion candidate taint from %s, since it is older than TTL %s", node.Name, since.String(), DeletionCandidateStalenessTTL.String())
continue
}
nodeToBeRemoved := simulator.NodeToBeRemoved{
Node: node,
}
nodesWithTaints = append(nodesWithTaints, nodeToBeRemoved)
klog.V(4).Infof("Found node %s with deletion candidate taint from %s", node.Name, since.String())
}
}

if len(nodesWithTaints) > 0 {
klog.V(1).Infof("Initializing unneeded nodes with %d nodes that have deletion candidate taints", len(nodesWithTaints))
n.initialize(nodesWithTaints, ts)
}

return nil
}

// initialize initializes the Nodes object with the given node list.
// It sets the initial state of unneeded nodes reflect the taint status of nodes in the cluster.
// This is in order the avoid state loss between deployment restarts.
func (n *Nodes) initialize(nodes []simulator.NodeToBeRemoved, ts time.Time) {
n.updateInternalState(nodes, ts, func(nn simulator.NodeToBeRemoved, ts time.Time) *node {
name := nn.Node.Name
if since, err := taints.GetDeletionCandidateTime(nn.Node); err == nil {
klog.V(4).Infof("Found node %s with deletion candidate taint from %s", name, since.String())
return &node{
ntbr: nn,
since: *since,
}
}
klog.V(4).Infof("Found node %s with deletion candidate taint from now", name)
return &node{
ntbr: nn,
since: ts,
}
})
}

// Update stores nodes along with a time at which they were found to be
// unneeded. Previously existing timestamps are preserved.
func (n *Nodes) Update(nodes []simulator.NodeToBeRemoved, ts time.Time) {
n.updateInternalState(nodes, ts, func(nn simulator.NodeToBeRemoved, ts time.Time) *node {
return &node{
ntbr: nn,
since: ts,
}
})
}

func (n *Nodes) updateInternalState(nodes []simulator.NodeToBeRemoved, ts time.Time, updatedNodeBuilder func(simulator.NodeToBeRemoved, time.Time) *node) {
updated := make(map[string]*node, len(nodes))
for _, nn := range nodes {
name := nn.Node.Name
updated[name] = &node{
ntbr: nn,
}
if val, found := n.byName[name]; found {
updated[name].since = val.since
} else {
updated[name].since = ts
updated[name] = &node{
ntbr: nn,
since: val.since,
}
} else if updatedNode := updatedNodeBuilder(nn, ts); updatedNode != nil {
updated[name] = updatedNode
}

}
n.byName = updated
n.cachedList = nil
Loading
Oops, something went wrong.
Loading
Oops, something went wrong.