Skip to content

Commit 8c5f6d2

Browse files
authoredApr 22, 2020
Adds Node Ignore from Deletion annotation (#187)
* Added annotation skip delete. test skeleton Added unit test for scale down changed annotation, addressed comments updated docs * address docs formatting comments * fixed grammar
1 parent 8d2353e commit 8c5f6d2

File tree

5 files changed

+201
-2
lines changed

5 files changed

+201
-2
lines changed
 

‎docs/README.md

+3
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,11 @@
1919
- Scale up
2020
- Scale down
2121
- Scale lock
22+
- Tainting of nodes
23+
- Cordoning of nodes
2224
- [**Node Termination**](./node-termination.md)
2325
- Node selection method for termination
26+
- Annotating nodes / Stopping termination of selected nodes
2427
- [**Pod and Node Selectors**](./pod-node-selectors.md)
2528
- Nodes
2629
- Pods

‎docs/node-termination.md

+37-1
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,40 @@ where we compare the creation timestamps of each node.
1515

1616
This method is useful to ensure there are always new nodes in the cluster. If you want to deploy a configuration change
1717
to your nodes, you can use Escalator to cycle the nodes by terminating the oldest first until all of the nodes are
18-
using the latest configuration.
18+
using the latest configuration.
19+
20+
### Annotating nodes / Stopping termination of selected nodes
21+
22+
For most cases when wanting to exclude a node from termination for maintenance, you should first consider the [**cordon function**](./scale-process).
23+
Cordoning a node filters it out from calculation **and** scaling processes (tainting and deletion). Additionally, cordoning a node in Kubernetes marks it Unschedule-able, so it won't accept workloads while in this state.
24+
25+
In the cases where Cordoning is not acceptable, because you want your node to continue to receive workloads but not be deleted, you can annotate the node to tell escalator to treat it as normal, except for stopping it from being deleted.
26+
This means the node will still factor into scaling calculations, and get tainted, untainted as the cluster scales, but if it is in a situation where it would be deleted normally, escalator will skip it and leave it active.
27+
28+
#### To annotate a node:
29+
30+
**Annotation key:** `atlassian.com/no-delete`
31+
32+
The annotation value can be any **non empty** string that conforms to the Kubernetes annotation value standards. This can usually indicate the reason for annotation which will appear in logs
33+
34+
**Example:**
35+
36+
`kubectl edit node <node-name>`
37+
38+
**Simple example:**
39+
```yaml
40+
apiVersion: v1
41+
kind: Node
42+
metadata:
43+
annotations:
44+
atlassian.com/no-delete: "true"
45+
```
46+
47+
**An elaborate reason:**
48+
```yaml
49+
apiVersion: v1
50+
kind: Node
51+
metadata:
52+
annotations:
53+
atlassian.com/no-delete: "testing some long running bpf tracing"
54+
```

‎docs/scale-process.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,5 +77,5 @@ Escalator taints are given the `atlassian.com/escalator` key.
7777
Escalator does not use the cordoning command anywhere in it's process. This is done to preserve the cordoning command
7878
for system administrators to filter the cordoned node out of calculations. This way, a faulty or misbehaving node
7979
can be cordoned by the system administrator to be debugged or troubleshooted without worrying about the node being
80-
tainted and then terminated by Escalator.
80+
tainted and then terminated by Escalator.
8181

‎pkg/controller/scale_down.go

+25
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@ import (
1212
v1 "k8s.io/api/core/v1"
1313
)
1414

15+
const (
16+
// NodeEscalatorIgnoreAnnotation is the key of an annotation on a node that signifies it should be ignored from ASG deletion
17+
// value does not matter, can be used for reason, as long as not empty
18+
// if set, the node wil not be deleted. However it still can be tainted and factored into calculations
19+
NodeEscalatorIgnoreAnnotation = "atlassian.com/no-delete"
20+
)
21+
1522
// ScaleDown performs the taint and remove node logic
1623
func (c *Controller) ScaleDown(opts scaleOpts) (int, error) {
1724
removed, err := c.TryRemoveTaintedNodes(opts)
@@ -29,12 +36,30 @@ func (c *Controller) ScaleDown(opts scaleOpts) (int, error) {
2936
return c.scaleDownTaint(opts)
3037
}
3138

39+
func safeFromDeletion(node *v1.Node) (string, bool) {
40+
for key, val := range node.ObjectMeta.Annotations {
41+
if key == NodeEscalatorIgnoreAnnotation && val != "" {
42+
return val, true
43+
}
44+
}
45+
return "", false
46+
}
47+
3248
// TryRemoveTaintedNodes attempts to remove nodes are
3349
// * tainted and empty
3450
// * have passed their grace period
3551
func (c *Controller) TryRemoveTaintedNodes(opts scaleOpts) (int, error) {
3652
var toBeDeleted []*v1.Node
3753
for _, candidate := range opts.taintedNodes {
54+
55+
// skip any nodes marked with the NodeEscalatorIgnore condition which is true
56+
// filter these nodes out as late as possible to ensure rest of escalator scaling calculations remain unaffected
57+
// This is because the nodes still exist and use resources, we don't want any inconsistencies. This node is safe from deletion not tainting
58+
if why, ok := safeFromDeletion(candidate); ok {
59+
log.Infof("node %s has escalator ignore annotation %s: Reason: %s. Removing from deletion options", candidate.Name, NodeEscalatorIgnoreAnnotation, why)
60+
continue
61+
}
62+
3863
// if the time the node was tainted is larger than the hard period then it is deleted no matter what
3964
// if the soft time is passed and the node is empty (excluding daemonsets) then it can be deleted
4065
taintedTime, err := k8s.GetToBeRemovedTime(candidate)

‎pkg/controller/scale_down_test.go

+135
Original file line numberDiff line numberDiff line change
@@ -365,3 +365,138 @@ func TestControllerTaintOldestN(t *testing.T) {
365365
func TestControllerScaleDown(t *testing.T) {
366366
t.Skip("test not implemented")
367367
}
368+
369+
func TestController_TryRemoveTaintedNodes(t *testing.T) {
370+
371+
minNodes := 10
372+
maxNodes := 20
373+
nodeGroup := NodeGroupOptions{
374+
Name: "default",
375+
CloudProviderGroupName: "default",
376+
MinNodes: minNodes,
377+
MaxNodes: maxNodes,
378+
ScaleUpThresholdPercent: 100,
379+
}
380+
nodeGroups := []NodeGroupOptions{nodeGroup}
381+
382+
nodes := test.BuildTestNodes(10, test.NodeOpts{
383+
CPU: 1000,
384+
Mem: 1000,
385+
Tainted: true,
386+
})
387+
388+
pods := buildTestPods(10, 1000, 1000)
389+
client, opts := buildTestClient(nodes, pods, nodeGroups, ListerOptions{})
390+
391+
// For these test cases we only use 1 node group/cloud provider node group
392+
nodeGroupSize := 1
393+
394+
// Create a test (mock) cloud provider
395+
testCloudProvider := test.NewCloudProvider(nodeGroupSize)
396+
testNodeGroup := test.NewNodeGroup(
397+
nodeGroup.CloudProviderGroupName,
398+
nodeGroup.Name,
399+
int64(minNodes),
400+
int64(maxNodes),
401+
int64(len(nodes)),
402+
)
403+
404+
testCloudProvider.RegisterNodeGroup(testNodeGroup)
405+
406+
// Create a node group state with the mapping of node groups to the cloud providers node groups
407+
nodeGroupsState := BuildNodeGroupsState(nodeGroupsStateOpts{
408+
nodeGroups: nodeGroups,
409+
client: *client,
410+
})
411+
412+
nodeGroupsState[testNodeGroup.ID()].NodeInfoMap = k8s.CreateNodeNameToInfoMap(pods, nodes)
413+
414+
c := &Controller{
415+
Client: client,
416+
Opts: opts,
417+
stopChan: nil,
418+
nodeGroups: nodeGroupsState,
419+
cloudProvider: testCloudProvider,
420+
}
421+
422+
// taint the oldest N according to the controller
423+
taintedIndex := c.taintOldestN(nodes, nodeGroupsState[testNodeGroup.ID()], 2)
424+
assert.Equal(t, len(taintedIndex), 2)
425+
426+
// add the untainted the the untainted list
427+
taintedNodes := []*v1.Node{nodes[taintedIndex[0]], nodes[taintedIndex[1]]}
428+
var untaintedNodes []*v1.Node
429+
for i, n := range nodes {
430+
if n == taintedNodes[0] || n == taintedNodes[1] {
431+
continue
432+
}
433+
434+
untaintedNodes = append(untaintedNodes, nodes[i])
435+
}
436+
assert.Equal(t, len(nodes)-2, len(untaintedNodes))
437+
438+
tests := []struct {
439+
name string
440+
opts scaleOpts
441+
annotateFirstTainted bool
442+
want int
443+
wantErr bool
444+
}{
445+
{
446+
"test normal delete all tainted",
447+
scaleOpts{
448+
nodes,
449+
taintedNodes,
450+
untaintedNodes,
451+
nodeGroupsState[testNodeGroup.ID()],
452+
0, // not used in TryRemoveTaintedNodes
453+
},
454+
false,
455+
-2,
456+
false,
457+
},
458+
{
459+
"test normal skip first tainted",
460+
scaleOpts{
461+
nodes,
462+
taintedNodes,
463+
untaintedNodes,
464+
nodeGroupsState[testNodeGroup.ID()],
465+
0, // not used in TryRemoveTaintedNodes
466+
},
467+
true,
468+
-1,
469+
false,
470+
},
471+
{
472+
"test none tainted",
473+
scaleOpts{
474+
nodes,
475+
[]*v1.Node{},
476+
nodes,
477+
nodeGroupsState[testNodeGroup.ID()],
478+
0, // not used in TryRemoveTaintedNodes
479+
},
480+
false,
481+
0,
482+
false,
483+
},
484+
}
485+
486+
for _, tt := range tests {
487+
t.Run(tt.name, func(t *testing.T) {
488+
if tt.annotateFirstTainted {
489+
tt.opts.taintedNodes[0].Annotations = map[string]string{
490+
NodeEscalatorIgnoreAnnotation: "skip for testing",
491+
}
492+
}
493+
got, err := c.TryRemoveTaintedNodes(tt.opts)
494+
if tt.wantErr {
495+
assert.Error(t, err)
496+
} else {
497+
assert.NoError(t, err)
498+
}
499+
assert.Equal(t, tt.want, got)
500+
})
501+
}
502+
}

0 commit comments

Comments
 (0)
Failed to load comments.