This repository has been archived by the owner on Mar 22, 2023. It is now read-only.
/
waiter-init
executable file
·135 lines (118 loc) · 5.8 KB
/
waiter-init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/bin/bash
#
# A wrapper script for the Waiter-specific setup for a user command in a Waiter-K8s pod.
# The script is usually invoked by prepending it to the user's Waiter command.
# If this script is invoked by dumb-init (github.com/Yelp/dumb-init) or a similar utility,
# please ensure that it is run in single-child mode.
#
# A single argument is expected: the user's command string,
# which is executed in its own bash shell process.
# This variable will be used to store the user's app's process ID.
# We set it to null here just in case it was set in the external environment.
waiter_child_pid=
# Catch the first SIGTERM sent by Kubernetes on pod deletion,
# waiting for a second signal (SIGTERM or SIGKILL) before exiting.
# This double-termination is an important part of our Waiter scale-down logic,
# and the mechanics are described in more detail below.
handle_k8s_terminate() {
waiter_2nd_sigterm=false
trap handle_2nd_k8s_terminate SIGTERM # new handler for next sigterm
# Propagate the SIGTERM to the user's app's process group,
# giving it the opportunity to shut down gracefully.
if [ "$waiter_child_pid" ]; then
kill -- -$waiter_child_pid
else
echo 'waiter error: user process not initialized' >&2
fi
waiter_init_pid=$$
{
# Give the user's application a few seconds to gracefully exit,
# then forcefully terminate with a SIGKILL if it's still running.
sleep ${WAITER_GRACE_SECS:=3}
kill -9 -- -$waiter_child_pid
# Wait for another signal from Kubernetes.
# This delay gives Waiter time to safely update the desired replica count
# before the pod actually terminates, avoiding a race to replace this pod.
# If we receive a second SIGTERM from Kubernetes, then the sleep period is canceled,
# and we simply wait for the user's process to complete (or get SIGKILLed).
# The main point here is to NOT exit before the second SIGTERM is received.
# If for some reason the second SIGTERM never arrives, the sleep will eventually expire,
# or the pod's grace period will expire (resulting in a SIGKILL from Kubernetes).
# Likewise, if the user's process takes too long to terminate gracefully,
# the pod's grace period will expire (resulting in a SIGKILL from Kubernetes).
# However, if we don't see the second SIGTERM after a reasonable delay,
# we assume we missed it (due to the asyncronous nature of the system),
# and that it is now safe to terminate this pod.
sleep ${WAITER_SYNC_MAX_SECS:=10}
kill -15 $waiter_init_pid
} &
# wait for graceful termination of user's process
while kill -0 $waiter_child_pid; do
wait %1
done &>/dev/null
# send logs to S3 (if enabled)
if [ "$WAITER_LOG_BUCKET_URL" ]; then
# Extract this pod's name from the hostname
# (this is used to create a unique path in S3)
pod_name=$(hostname --short)
base_url="$WAITER_LOG_BUCKET_URL/$pod_name"
# For each ./r* directory created by a container restart,
# we upload the stdout and stderr, and build an index.json file
# that is also uploaded to the target directory in the S3 bucket.
# We work backwards from the most recent run down to 0 to increase the odds
# that our most recent runs' logs are successfully persisted before a SIGKILL.
cd "$waiter_sandbox_base_dir"
for i in $(seq $waiter_restart_count -1 0); do
waiter_log_files='stdout stderr'
indextmp=".r$i.index.json"
rm -f $indextmp
separator='['
for f in $waiter_log_files; do
logfile="r$i/$f"
# Using the -T option with curl PUTs the target file to the given URL,
# and avoids loading the full file into memory when sending the payload.
curl -s -T "$logfile" "$base_url/$logfile"
printf '%s\n{"name":"%s","type":"file","size":%d}' "$separator" "$f" "$(stat -c%s $logfile)" >> $indextmp
separator=','
done
printf '\n]\n' >> $indextmp
curl -s -T "$indextmp" "$base_url/r$i/index.json"
done
fi
# wait for second sigterm to arrive
while [ $waiter_2nd_sigterm != true ]; do
sleep 0.1
done
# Exit container with code 128+15=143, indicating termination via SIGTERM.
exit 143
}
# Catch the second SIGTERM sent by Kubernetes on pod deletion.
# This double-termination is an important part of our Waiter scale-down logic,
# and the mechanics are described in more detail above (in handle_k8s_terminate).
handle_2nd_k8s_terminate() {
trap : SIGTERM # reset SIGTERM handler to no-op
waiter_2nd_sigterm=true
}
trap handle_k8s_terminate SIGTERM
# Track container restart count
waiter_restart_count=$(( $([ -f .waiter-container-runs ] && cat .waiter-container-runs) ))
echo $(( $waiter_restart_count + 1 )) > .waiter-container-runs
# Ensure that HOME is set to the fresh working directory for this container instance.
# HOME should be a symlink ./latest, which points to the new working directory.
waiter_sandbox_base_dir="$(pwd -P)"
waiter_sandbox_dir="./r${waiter_restart_count}"
mkdir -p "$waiter_sandbox_dir"
ln -Tsf $waiter_sandbox_dir latest
cd "$HOME"
# Copy stdout and stderr to respectively named files to mimic Mesos containers.
# We tee the output so that stdout and stderr are still accessible
# via the Kubernetes `kubectl logs <pod-name>` command.
exec 2> >(tee stderr 1>&2)
exec 1> >(tee stdout)
# Run the user's Waiter app command in its own process group.
/usr/bin/setsid /bin/bash -c "$1" &
waiter_child_pid=$!
# Wait for the user's process to exit, propagating the exit code.
# If this wait call is interrupted by a SIGTERM,
# then the control flow switches to the handle_k8s_terminate routine.
wait %1