-
Notifications
You must be signed in to change notification settings - Fork 1
/
rotate_cluster.sh
executable file
·155 lines (113 loc) · 4.85 KB
/
rotate_cluster.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/bin/bash
set -eo pipefail
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
function usage() {
>&2 cat << EOF
Usage: ./rotate_cluster.sh
Set the following environment variables to run this script:
KUBECONFIG The path to the kubeconfig file of the cluster.
MASTER_IPS The list of public IPs of the master nodes, separated by space
WORKER_IPS The list of private IPs of the worker nodes, separated by space
SSH_KEY The path to the ssh private key that allows to login the master nodes
EOF
exit 1
}
function wait_pods() {
sleep 5
running_pods=0
terminating_pods=0
until [[ $running_pods > 0 && $terminating_pods == 0 ]]; do
sleep 5
running_pods=$(${KUBECTL} get pods -l k8s-app=${1} -n kube-system 2>/dev/null | grep Running | wc -l || true)
terminating_pods=$(${KUBECTL} get pods -l k8s-app=${1} -n kube-system 2>/dev/null | grep Terminating | wc -l || true)
echo "running pods: $running_pods, terminating pods: $terminating_pods"
done
echo "${1} restarted"
}
function restart_pods() {
echo "restart ${1}"
${KUBECTL} delete pod -l k8s-app=${1} -n kube-system || true
wait_pods ${1}
}
function restart_kubelet() {
for ADDR in $MASTER_IPS; do
echo "restart kubelet on master $ADDR"
ssh -A -o StrictHostKeyChecking=no -i ${SSH_KEY} core@$ADDR "sudo systemctl restart kubelet"
echo "kubelet on master $ADDR restarted"
sleep 10
done
master_ip_list=($MASTER_IPS)
master_ip=${master_ip_list[0]}
for ADDR in $WORKER_IPS; do
echo "restart kubelet on worker $ADDR"
ssh -A -o StrictHostKeyChecking=no -i ${SSH_KEY} core@$master_ip "ssh -o StrictHostKeyChecking=no core@$ADDR sudo systemctl restart kubelet"
echo "kubelet on worker $ADDR restarted"
sleep 10
done
}
KUBECTL=${DIR}/kubectl
if [ -z "$KUBECONFIG" ]; then
usage
fi
if [ -z "$MASTER_IPS" ]; then
usage
fi
if [ -z "$WORKER_IPS" ]; then
usage
fi
if [ -z "$SSH_KEY" ]; then
usage
fi
set -u
echo "update CA"
${KUBECTL} patch -f ./generated/patches/step_1/kube-apiserver-secret.patch -p "$(cat ./generated/patches/step_1/kube-apiserver-secret.patch)"
${KUBECTL} patch -f ./generated/patches/step_1/kube-controller-manager-secret.patch -p "$(cat ./generated/patches/step_1/kube-controller-manager-secret.patch)"
${KUBECTL} patch -f ./generated/patches/step_1/tectonic-ca-cert-secret.patch -p "$(cat ./generated/patches/step_1/tectonic-ca-cert-secret.patch)"
${KUBECTL} patch -f ./generated/patches/step_1/ingress-tls.patch -p "$(cat ./generated/patches/step_1/ingress-tls.patch)"
${KUBECTL} patch -f ./generated/patches/step_1/identity-grpc-client.patch -p "$(cat ./generated/patches/step_1/identity-grpc-client.patch)"
${KUBECTL} patch -f ./generated/patches/step_1/identity-grpc-server.patch -p "$(cat ./generated/patches/step_1/identity-grpc-server.patch)"
sleep 10
restart_pods kube-apiserver
echo
echo "Please replace the kubeconfig on each node before proceeding"
echo "If you are on AWS, you can run ./aws/update_kubeconfig"
echo "If you are on other platform, please contact support for instructions"
echo "Press 'y' when finished"
echo
REPLY=0
until [[ $REPLY == y ]]; do
read -p "" -n 1 -r
done
restart_kubelet
echo "update api server cert."
${KUBECTL} patch -f ./generated/patches/step_2/kube-apiserver-secret.patch -p "$(cat ./generated/patches/step_2/kube-apiserver-secret.patch)"
sleep 10
echo "restart API server"
${KUBECTL} delete pod -l k8s-app=kube-apiserver -n kube-system || true
# Use the new kubeconfig for listing pods because we just rotated the API server certs above.
export KUBECONFIG=./generated/auth/kubeconfig
wait_pods kube-apiserver
echo "restart kube-controller-manager and pod checkpointer"
restart_pods kube-controller-manager
restart_pods pod-checkpointer
echo "delete old CA block."
${KUBECTL} patch -f ./generated/patches/step_3/kube-apiserver-secret.patch -p "$(cat ./generated/patches/step_3/kube-apiserver-secret.patch)"
${KUBECTL} patch -f ./generated/patches/step_3/kube-controller-manager-secret.patch -p "$(cat ./generated/patches/step_3/kube-controller-manager-secret.patch)"
sleep 10
restart_pods kube-apiserver
cp ./generated/patches/step_3/final_kubeconfig ./generated/auth/kubeconfig
echo
echo "Please replace the kubeconfig again on each node before proceeding"
echo "If you are on AWS, you can run ./aws/update_kubeconfig"
echo "If you are on other platform, please contact support for instructions"
echo "Press 'y' when finished"
echo
REPLY=0
until [[ $REPLY == y ]]; do
read -p "" -n 1 -r
done
restart_kubelet
echo
echo "Cluster CA and certs are successfully rotated"
echo "Please reboot all nodes one by one to ensure all pods update their service account"
echo "This can be done by running ./utils/reboot_helper.sh"