From 5c294f475e85081e85ecc82462f20129c3a55ee0 Mon Sep 17 00:00:00 2001
From: Nick Vrvilo <Nick.Vrvilo@alumni.rice.edu>
Date: Thu, 10 Jan 2019 17:43:35 -0600
Subject: [PATCH] S3 backup for Waiter Service logs on K8s

---
 kitchen/.gitignore                            |  5 +
 kitchen/bin/test-waiter-init                  | 31 +++++++
 kitchen/bin/waiter-init                       | 91 +++++++++++++++---
 .../fileserver-container/fileserver-start     |  4 +-
 .../ci/run-integration-tests-k8s-scheduler.sh | 11 ++-
 waiter/bin/ci/s3-server-setup.sh              | 26 ++++++
 waiter/config-full.edn                        |  8 ++
 waiter/config-k8s.edn                         |  2 +
 .../kubernetes_scheduler_integration_test.clj | 93 ++++++++++++++++++-
 waiter/src/waiter/scheduler/kubernetes.clj    | 78 ++++++++++++----
 waiter/src/waiter/settings.clj                |  5 +-
 waiter/src/waiter/util/client_tools.clj       |  5 +
 .../test/waiter/scheduler/kubernetes_test.clj | 10 +-
 13 files changed, 327 insertions(+), 42 deletions(-)
 create mode 100755 waiter/bin/ci/s3-server-setup.sh

diff --git a/kitchen/.gitignore b/kitchen/.gitignore
index 3b9992af2..3709e3533 100644
--- a/kitchen/.gitignore
+++ b/kitchen/.gitignore
@@ -1,3 +1,8 @@
 .cache
 __pycache__
 kitchen.log
+
+/latest
+/r[0-9]
+/.r[0-9].index.json
+/.waiter-container-runs
diff --git a/kitchen/bin/test-waiter-init b/kitchen/bin/test-waiter-init
index 0573170c3..bfd653253 100755
--- a/kitchen/bin/test-waiter-init
+++ b/kitchen/bin/test-waiter-init
@@ -13,6 +13,18 @@ start_test() {
     working_dir=r$(( $(cat .waiter-container-runs) - 1 ))
 }
 
+start_guarded_test() {
+    test_desc="$1"
+    shift
+    if test "$@"; then
+        start_test "$test_desc"
+        test_started=true
+    else
+        printf '\nSkipping Test %s' "$test_desc"
+        test_started=false
+    fi
+}
+
 send_sigterm() {
     sleep 0.1
     kill $test_pid
@@ -136,4 +148,23 @@ assert $working_dir != $old_working_dir
 assert -f $working_dir/$fresh_file_name
 assert $(cat $working_dir/$fresh_file_name) == hello
 
+#
+# Ensure that the stdout file captured by the wrapper script
+# is uploaded to S3 when the script is terminated.
+# This test runs iff WAITER_LOG_BUCKET_URL is non-null.
+#
+start_guarded_test 'stdout uploaded to s3 bucket' "$WAITER_LOG_BUCKET_URL" <<EOF
+echo TestString && sleep 10
+EOF
+if $test_started; then
+    send_sigterm
+    send_sigterm
+    await_exit_and_assert_code 143
+    pod_name=$(hostname --short)
+    target_bucket_dir=$WAITER_LOG_BUCKET_URL/$pod_name/$working_dir
+    assert "$(curl -s $target_bucket_dir/stdout)" == TestString
+    assert "$(curl -s $target_bucket_dir/index.json | jq -r '.[0].name')" == stdout
+    assert "$(curl -s $target_bucket_dir/index.json | jq '.[0].size')" == 11
+fi
+
 printf '\n\nAll tests passed\n'
diff --git a/kitchen/bin/waiter-init b/kitchen/bin/waiter-init
index 5aebdf363..cd73dc4dc 100755
--- a/kitchen/bin/waiter-init
+++ b/kitchen/bin/waiter-init
@@ -17,7 +17,8 @@ waiter_child_pid=
 # This double-termination is an important part of our Waiter scale-down logic,
 # and the mechanics are described in more detail below.
 handle_k8s_terminate() {
-    trap : SIGTERM  # reset SIGTERM handler to no-op
+    waiter_2nd_sigterm=false
+    trap handle_2nd_k8s_terminate SIGTERM  # new handler for next sigterm
 
     # Propagate the SIGTERM to the user's app's process group,
     # giving it the opportunity to shut down gracefully.
@@ -27,24 +28,83 @@ handle_k8s_terminate() {
         echo 'waiter error: user process not initialized' >&2
     fi
 
-    # Wait for a long time (15 minutes), awaiting another signal from Kubernetes.
-    # This delay gives Waiter time to safely update the desired replica count
-    # before the pod actually terminates, avoiding a race to replace this pod.
-    # If we receive a second SIGTERM from Kubernetes, then the sleep period is canceled,
-    # and we simply wait for the user's process to complete (or get SIGKILLed).
-    # The main point here is to NOT exit before the second SIGTERM is received.
-    # If for some reason the second SIGTERM never arrives, the sleep will eventually expire,
-    # or the pod's grace period will expire (resulting in a SIGKILL from Kubernetes).
-    # Likewise, if the user's process takes too long to terminate gracefully,
-    # the pod's grace period will expire (resulting in a SIGKILL from Kubernetes).
-    sleep 900 &
-    wait     # wait for sleep to complete, or a second SIGTERM
-    kill %2  # cancel sleep
-    wait     # wait for graceful termination of user's process
+    waiter_init_pid=$$
+
+    {
+        # Give the user's application a few seconds to gracefully exit,
+        # then forcefully terminate with a SIGKILL if it's still running.
+        sleep ${WAITER_GRACE_SECS:=3}
+        kill -9 -- -$waiter_child_pid
+
+        # Wait for another signal from Kubernetes.
+        # This delay gives Waiter time to safely update the desired replica count
+        # before the pod actually terminates, avoiding a race to replace this pod.
+        # If we receive a second SIGTERM from Kubernetes, then the sleep period is canceled,
+        # and we simply wait for the user's process to complete (or get SIGKILLed).
+        # The main point here is to NOT exit before the second SIGTERM is received.
+        # If for some reason the second SIGTERM never arrives, the sleep will eventually expire,
+        # or the pod's grace period will expire (resulting in a SIGKILL from Kubernetes).
+        # Likewise, if the user's process takes too long to terminate gracefully,
+        # the pod's grace period will expire (resulting in a SIGKILL from Kubernetes).
+        # However, if we don't see the second SIGTERM after a reasonable delay,
+        # we assume we missed it (due to the asyncronous nature of the system),
+        # and that it is now safe to terminate this pod.
+        sleep ${WAITER_SYNC_MAX_SECS:=10}
+        kill -15 $waiter_init_pid
+    } &
+
+    # wait for graceful termination of user's process
+    while kill -0 $waiter_child_pid; do
+        wait %1
+    done &>/dev/null
+
+    # send logs to S3 (if enabled)
+    if [ "$WAITER_LOG_BUCKET_URL" ]; then
+        # Extract this pod's name from the hostname
+        # (this is used to create a unique path in S3)
+        pod_name=$(hostname --short)
+        base_url="$WAITER_LOG_BUCKET_URL/$pod_name"
+        # For each ./r* directory created by a container restart,
+        # we upload the stdout and stderr, and build an index.json file
+        # that is also uploaded to the target directory in the S3 bucket.
+        # We work backwards from the most recent run down to 0 to increase the odds
+        # that our most recent runs' logs are successfully persisted before a SIGKILL.
+        cd "$waiter_sandbox_base_dir"
+        for i in $(seq $waiter_restart_count -1 0); do
+            waiter_log_files='stdout stderr'
+            indextmp=".r$i.index.json"
+            rm -f $indextmp
+            separator='['
+            for f in $waiter_log_files; do
+                logfile="r$i/$f"
+                # Using the -T option with curl PUTs the target file to the given URL,
+                # and avoids loading the full file into memory when sending the payload.
+                curl -s -T "$logfile" "$base_url/$logfile"
+                printf '%s\n{"name":"%s","type":"file","size":%d}' "$separator" "$f" "$(stat -c%s $logfile)" >> $indextmp
+                separator=','
+            done
+            printf '\n]\n' >> $indextmp
+            curl -s -T "$indextmp" "$base_url/r$i/index.json"
+        done
+    fi
+
+    # wait for second sigterm to arrive
+    while [ $waiter_2nd_sigterm != true ]; do
+        sleep 0.1
+    done
 
     # Exit container with code 128+15=143, indicating termination via SIGTERM.
     exit 143
 }
+
+# Catch the second SIGTERM sent by Kubernetes on pod deletion.
+# This double-termination is an important part of our Waiter scale-down logic,
+# and the mechanics are described in more detail above (in handle_k8s_terminate).
+handle_2nd_k8s_terminate() {
+    trap : SIGTERM  # reset SIGTERM handler to no-op
+    waiter_2nd_sigterm=true
+}
+
 trap handle_k8s_terminate SIGTERM
 
 # Track container restart count
@@ -53,6 +113,7 @@ echo $(( $waiter_restart_count + 1 )) > .waiter-container-runs
 
 # Ensure that HOME is set to the fresh working directory for this container instance.
 # HOME should be a symlink ./latest, which points to the new working directory.
+waiter_sandbox_base_dir="$(pwd -P)"
 waiter_sandbox_dir="./r${waiter_restart_count}"
 mkdir -p "$waiter_sandbox_dir"
 ln -Tsf $waiter_sandbox_dir latest
diff --git a/kubernetes/fileserver-container/fileserver-start b/kubernetes/fileserver-container/fileserver-start
index 6c8be8eef..09e0b237c 100755
--- a/kubernetes/fileserver-container/fileserver-start
+++ b/kubernetes/fileserver-container/fileserver-start
@@ -7,5 +7,5 @@ export WAITER_FILESERVER_PORT
 # Generate server config from template
 envsubst </root/nginx.conf.template >/root/nginx.conf
 
-# Start server in non-daemon mode
-nginx -c /root/nginx.conf
+# Start server in non-daemon mode, replacing current process
+exec nginx -c /root/nginx.conf
diff --git a/waiter/bin/ci/run-integration-tests-k8s-scheduler.sh b/waiter/bin/ci/run-integration-tests-k8s-scheduler.sh
index 035883a9a..755973cca 100755
--- a/waiter/bin/ci/run-integration-tests-k8s-scheduler.sh
+++ b/waiter/bin/ci/run-integration-tests-k8s-scheduler.sh
@@ -2,8 +2,8 @@
 # Usage: run-integration-tests-k8s-scheduler.sh [TEST_COMMAND] [TEST_SELECTOR]
 #
 # Examples:
-#   run-integration-tests-k8s-scheduler.sh parallel-test integration-fast
-#   run-integration-tests-k8s-scheduler.sh parallel-test integration-slow
+#   run-integration-tests-k8s-scheduler.sh parallel-test integration-heavy
+#   run-integration-tests-k8s-scheduler.sh parallel-test integration-lite
 #   run-integration-tests-k8s-scheduler.sh parallel-test
 #   run-integration-tests-k8s-scheduler.sh
 #
@@ -21,6 +21,13 @@ KITCHEN_DIR=${WAITER_DIR}/../kitchen
 # Start minikube
 ${DIR}/minikube-setup.sh
 
+# Start S3 test server
+if [[ $TEST_SELECTOR =~ heavy$ ]]; then
+    ${DIR}/s3-server-setup.sh
+    S3SERVER_IP=$(docker inspect s3server | jq -r '.[0].NetworkSettings.Networks.bridge.IPAddress')
+    export WAITER_S3_BUCKET=http://$S3SERVER_IP:8000/waiter-service-logs
+fi
+
 # Ensure we have the docker image for the pods
 ${KITCHEN_DIR}/bin/build-docker-image.sh
 
diff --git a/waiter/bin/ci/s3-server-setup.sh b/waiter/bin/ci/s3-server-setup.sh
new file mode 100755
index 000000000..f68dbe550
--- /dev/null
+++ b/waiter/bin/ci/s3-server-setup.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+set -e
+
+# Install AWS CLI (for s3api commands) via pip
+# We use this to handle S3 authentication for bucket creation
+# https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-install.html
+pip install awscli --upgrade --user
+
+# Run cloudserver (S3 compatible test server) via docker
+# The API server endpoint is accessible via localhost:8888
+# https://hub.docker.com/r/scality/s3server
+echo Starting S3 server docker container
+docker run --name s3server --detach --rm --publish=8888:8000 scality/s3server:6018536a
+echo -n Waiting for S3 server
+while ! curl localhost:8888 &>/dev/null; do
+    echo -n .
+    sleep 1
+done
+echo
+
+# Create a public RW bucket for waiter app logs, using the default cloudserver credentials
+# https://github.com/scality/cloudserver#run-it-with-a-file-backend
+AWS_ACCESS_KEY_ID=accessKey1 AWS_SECRET_ACCESS_KEY=verySecretKey1 \
+    aws s3api create-bucket --endpoint-url=http://localhost:8888 \
+    --acl=public-read-write --bucket=waiter-service-logs --output=table
diff --git a/waiter/config-full.edn b/waiter/config-full.edn
index ba363f793..340a3521d 100644
--- a/waiter/config-full.edn
+++ b/waiter/config-full.edn
@@ -349,6 +349,14 @@
                                  :authorizer {:kind :default
                                               :default {:factory-fn waiter.authorization/noop-authorizer}}
 
+                                 ;; The :log-bucket-url setting is optional, but if it's non-nil, it should be the URL (string) of an S3 bucket
+                                 ;; where log files should be copied when a waiter service pod is terminated.
+                                 ;; When a :log-bucket-url is given, the pod waits up to an additional :log-bucket-sync-secs before terminating
+                                 ;; (starting after :pod-sigkill-delay-secs or the user process's graceful exit).
+                                 ;; The log-bucket-sync-secs option should be set between 0 and 300 seconds, inclusive (default is 180).
+                                 :log-bucket-sync-secs 180
+                                 :log-bucket-url "http://s3.example.com/waiter-service-logs"
+
                                  ;; String value used to annotate Kubernetes objects that are orchestrated by this Waiter instantiation:
                                  :cluster-name "waiter"
 
diff --git a/waiter/config-k8s.edn b/waiter/config-k8s.edn
index 569ec8a09..68fb7cb84 100644
--- a/waiter/config-k8s.edn
+++ b/waiter/config-k8s.edn
@@ -24,6 +24,8 @@
                     :kubernetes {:authorizer {:kind :sanity-check
                                               :sanity-check {:factory-fn waiter.authorization/sanity-check-authorizer}}
                                  :fileserver {:port 591}
+                                 :log-bucket-sync-secs 10
+                                 :log-bucket-url #config/env-default ["WAITER_S3_BUCKET" nil]
                                  :url "http://localhost:8001"}}
 
  ; ---------- Error Handling ----------
diff --git a/waiter/integration/waiter/kubernetes_scheduler_integration_test.clj b/waiter/integration/waiter/kubernetes_scheduler_integration_test.clj
index 24dde128d..953ebf9c2 100644
--- a/waiter/integration/waiter/kubernetes_scheduler_integration_test.clj
+++ b/waiter/integration/waiter/kubernetes_scheduler_integration_test.clj
@@ -1,5 +1,10 @@
 (ns waiter.kubernetes-scheduler-integration-test
-  (:require [clojure.test :refer :all]
+  (:require [clojure.data.json :as json]
+            [clojure.set :as set]
+            [clojure.string :as string]
+            [clojure.walk :as walk]
+            [clojure.test :refer :all]
+            [clojure.tools.logging :as log]
             [waiter.util.client-tools :refer :all]))
 
 (deftest ^:parallel ^:integration-fast test-kubernetes-watch-state-update
@@ -34,3 +39,89 @@
                     (< initial-rs-snapshot-version initial-rs-watch-version)))
             (is (<= initial-rs-snapshot-version rs-snapshot-version'))
             (is (< rs-snapshot-version' rs-watch-version'))))))))
+
+(deftest ^:parallel ^:integration-slow ^:resource-heavy test-s3-logs
+  (testing-using-waiter-url
+    (when (using-k8s? waiter-url)
+      (let [headers {:x-waiter-name (rand-name)
+                     :x-waiter-max-instances 2
+                     :x-waiter-scale-up-factor 0.99
+                     :x-waiter-scale-down-factor 0.99
+                     :x-kitchen-delay-ms 5000}
+            _ (log/info "making canary request...")
+            {:keys [cookies instance-id service-id]} (make-request-with-debug-info headers #(make-kitchen-request waiter-url %))
+            request-fn (fn [] (->> #(make-kitchen-request waiter-url %)
+                                   (make-request-with-debug-info headers)
+                                   :instance-id))]
+        (with-service-cleanup
+          service-id
+          (assert-service-on-all-routers waiter-url service-id cookies)
+          ;; Get a service with at least one active and one killed instance.
+          ;; This portion of the test logic was copied from basic-test/test-killed-instances
+          (log/info "starting parallel requests")
+          (let [instance-ids-atom (atom #{})
+                instance-request-fn (fn []
+                                      (let [instance-id (request-fn)]
+                                        (swap! instance-ids-atom conj instance-id)))
+                instance-ids (->> (parallelize-requests 4 10 instance-request-fn
+                                                        :canceled? (fn [] (> (count @instance-ids-atom) 2))
+                                                        :verbose true
+                                                        :service-id service-id)
+                                  (reduce set/union))]
+            (is (> (count instance-ids) 1) (str instance-ids)))
+
+          (log/info "waiting for at least one instance to get killed")
+          (is (wait-for #(->> (get-in (service-settings waiter-url service-id) [:instances :killed-instances])
+                              (map :id)
+                              set
+                              seq)
+                        :interval 2 :timeout 45)
+              (str "No killed instances found for " service-id))
+          ;; Test that the active instances' logs are available.
+          ;; This portion of the test logic was copied from basic-test/test-basic-logs
+          (let [active-instances (get-in (service-settings waiter-url service-id :cookies cookies)
+                                         [:instances :active-instances])
+                log-url (:log-url (first active-instances))
+                _ (log/debug "Log Url Active:" log-url)
+                make-request-fn (fn [url] (make-request url "" :verbose true))
+                {:keys [body] :as logs-response} (make-request-fn log-url)
+                _ (assert-response-status logs-response 200)
+                _ (log/debug "Response body:" body)
+                log-files-list (walk/keywordize-keys (json/read-str body))
+                stdout-file-link (:url (first (filter #(= (:name %) "stdout") log-files-list)))
+                stderr-file-link (:url (first (filter #(= (:name %) "stderr") log-files-list)))]
+            (is (every? #(string/includes? body %) ["stderr" "stdout"])
+                (str "Directory listing is missing entries: stderr and stdout, got response: " logs-response))
+            (doseq [file-link [stderr-file-link stdout-file-link]]
+              (if (string/starts-with? (str file-link) "http")
+                (assert-response-status (make-request-fn file-link) 200)
+                (log/warn "test-basic-logs did not verify file link:" stdout-file-link))))
+          (delete-service waiter-url service-id)
+          ;; Test that the killed instances' logs were persisted to S3.
+          ;; This portion of the test logic was modified from the active-instances tests above.
+          (log/info "waiting s3 logs to appear")
+          (let [log-bucket-url (k8s-log-bucket-url waiter-url)]
+            (is (wait-for
+                  #(let [killed-instances (get-in (service-settings waiter-url service-id :cookies cookies)
+                                                  [:instances :killed-instances])
+                         log-url (:log-url (first killed-instances))]
+                     (string/starts-with? log-url log-bucket-url))
+                  :interval 1 :timeout 15)
+                (str "Log URL never pointed to S3 bucket " log-bucket-url)))
+          (let [killed-instances (get-in (service-settings waiter-url service-id :cookies cookies)
+                                         [:instances :killed-instances])
+                log-url (:log-url (first killed-instances))
+                _ (log/debug "Log Url Killed:" log-url)
+                make-request-fn (fn [url] (make-request url "" :verbose true))
+                {:keys [body] :as logs-response} (make-request-fn log-url)
+                _ (assert-response-status logs-response 200)
+                _ (log/debug "Response body:" body)
+                log-files-list (walk/keywordize-keys (json/read-str body))
+                stdout-file-link (:url (first (filter #(= (:name %) "stdout") log-files-list)))
+                stderr-file-link (:url (first (filter #(= (:name %) "stderr") log-files-list)))]
+            (is (every? #(string/includes? body %) ["stderr" "stdout"])
+                (str "Directory listing is missing entries: stderr and stdout, got response: " logs-response))
+            (doseq [file-link [stderr-file-link stdout-file-link]]
+              (if (string/starts-with? (str file-link) "http")
+                (assert-response-status (make-request-fn file-link) 200)
+                (log/warn "test-basic-logs did not verify file link:" stdout-file-link)))))))))
diff --git a/waiter/src/waiter/scheduler/kubernetes.clj b/waiter/src/waiter/scheduler/kubernetes.clj
index 5b287dffb..106434fa4 100644
--- a/waiter/src/waiter/scheduler/kubernetes.clj
+++ b/waiter/src/waiter/scheduler/kubernetes.clj
@@ -135,6 +135,14 @@
          service-id (k8s-object->service-id pod)]
      (str service-id \. pod-name \- restart-count))))
 
+(defn- unpack-instance-id
+  "Extract the service-id, pod name, and pod restart-number from an instance-id."
+  [instance-id]
+   (let [[_ service-id pod-name restart-number] (re-find #"^([-a-z0-9]+)\.([-a-z0-9]+)-(\d)+$" instance-id)]
+     {:service-id service-id
+      :pod-name pod-name
+      :restart-number restart-number}))
+
 (defn- log-dir-path [namespace restart-count]
   "Build log directory path string for a containter run."
   (str "/home/" namespace "/r" restart-count))
@@ -444,6 +452,7 @@
                                 daemon-state
                                 fileserver
                                 http-client
+                                log-bucket-url
                                 max-patch-retries
                                 max-name-length
                                 pod-base-port
@@ -543,14 +552,13 @@
          :message "Error while scaling waiter service"})))
 
   (retrieve-directory-content
-    [{:keys [http-client] {:keys [port scheme]} :fileserver}
-     _ instance-id host browse-path]
+    [{:keys [http-client log-bucket-url service-id->service-description-fn watch-state]
+      {:keys [port scheme]} :fileserver}
+     service-id instance-id host browse-path]
     (let [auth-str @k8s-api-auth-str
           headers (when auth-str {"Authorization" auth-str})
-          instance-restart-number (->> (string/last-index-of instance-id \-)
-                                       inc
-                                       (subs instance-id))
-          instance-base-dir (str "/r" instance-restart-number)
+          {:keys [_ pod-name restart-number]} (unpack-instance-id instance-id)
+          instance-base-dir (str "/r" restart-number)
           browse-path (if (string/blank? browse-path) "/" browse-path)
           browse-path (cond->
                         browse-path
@@ -558,8 +566,19 @@
                         (str "/")
                         (not (string/starts-with? browse-path "/"))
                         (->> (str "/")))
-          target-url (str scheme "://" host ":" port instance-base-dir browse-path)]
-      (when port
+          run-as-user (-> service-id
+                          service-id->service-description-fn
+                          service-description->namespace)
+          pod (get-in @watch-state [:service-id->pod-id->pod service-id pod-name])
+          target-url (if pod
+                       ;; the pod is live: try accessing logs through sidecar
+                       (when port
+                         (str scheme "://" host ":" port instance-base-dir browse-path))
+                       ;; the pod is not live: try accessing logs through S3
+                       (when log-bucket-url
+                         (str log-bucket-url "/" run-as-user "/" service-id "/"
+                              pod-name instance-base-dir browse-path)))]
+      (when target-url
         (ss/try+
           (let [result (http-utils/http-request
                          http-client
@@ -586,7 +605,9 @@
      :syncer (retrieve-syncer-state-fn)})
 
   (validate-service [_ service-id]
-    (let [{:strs [run-as-user]} (service-id->service-description-fn service-id)]
+    (let [run-as-user (-> service-id
+                          service-id->service-description-fn
+                          service-description->namespace)]
       (authz/check-user authorizer run-as-user service-id))))
 
 (defn default-replicaset-builder
@@ -597,19 +618,32 @@
    {:strs [backend-proto cmd cpus grace-period-secs health-check-interval-secs
            health-check-max-consecutive-failures mem min-instances ports
            run-as-user] :as service-description}
-   {:keys [default-container-image] :as context}]
+   {:keys [default-container-image log-bucket-url] :as context}]
   (let [work-path (str "/home/" run-as-user)
         home-path (str work-path "/latest")
         base-env (scheduler/environment service-id service-description
                                         service-id->password-fn home-path)
+        ;; We include the default log-bucket-sync-secs value in the total-sigkill-delay-secs
+        ;; delay iff the log-bucket-url setting was given the scheduler config.
+        log-bucket-sync-secs (if log-bucket-url (:log-bucket-sync-secs context) 0)
+        total-sigkill-delay-secs (+ pod-sigkill-delay-secs log-bucket-sync-secs)
         ;; Make $PORT0 value pseudo-random to ensure clients can't hardcode it.
         ;; Helps maintain compatibility with Marathon, where port assignment is dynamic.
         port0 (-> service-id hash (mod 100) (* 10) (+ pod-base-port))
         env (into [;; We set these two "MESOS_*" variables to improve interoperability.
                    ;; New clients should prefer using WAITER_SANDBOX.
                    {:name "MESOS_DIRECTORY" :value home-path}
-                   {:name "MESOS_SANDBOX" :value home-path}]
+                   {:name "MESOS_SANDBOX" :value home-path}
+                   ;; Number of seconds to wait after receiving a sigterm
+                   ;; before sending a sigkill to the user's process.
+                   ;; This is handled by the waiter-init script,
+                   ;; separately from the pod's grace period,
+                   ;; in order to provide extra time for logs to sync to an s3 bucket.
+                   {:name "WAITER_GRACE_SECS" :value (str pod-sigkill-delay-secs)}]
                   (concat
+                    (when log-bucket-url
+                      [{:name "WAITER_LOG_BUCKET_URL"
+                        :value (str log-bucket-url "/" run-as-user "/" service-id)}])
                     (for [[k v] base-env]
                       {:name k :value v})
                     (for [i (range ports)]
@@ -663,7 +697,7 @@
                                               :workingDir work-path}]
                                 :volumes [{:name "user-home"
                                            :emptyDir {}}]
-                                :terminationGracePeriodSeconds pod-sigkill-delay-secs}}}}
+                                :terminationGracePeriodSeconds total-sigkill-delay-secs}}}}
       ;; Optional fileserver sidecar container
       (integer? (:port fileserver))
       (update-in
@@ -672,8 +706,7 @@
         (let [{:keys [cmd image port] {:keys [cpu mem]} :resources} fileserver
               memory (str mem "Mi")]
           {:command cmd
-           :env [{:name "WAITER_FILESERVER_PORT"
-                  :value (str port)}]
+           :env [{:name "WAITER_FILESERVER_PORT" :value (str port)}]
            :image image
            :imagePullPolicy "IfNotPresent"
            :name "waiter-fileserver"
@@ -861,10 +894,11 @@
 (defn kubernetes-scheduler
   "Returns a new KubernetesScheduler with the provided configuration. Validates the
    configuration against kubernetes-scheduler-schema and throws if it's not valid."
-  [{:keys [authentication authorizer cluster-name http-options max-patch-retries max-name-length
-           pod-base-port pod-sigkill-delay-secs pod-suffix-length replicaset-api-version replicaset-spec-builder
-           scheduler-name scheduler-state-chan scheduler-syncer-interval-secs service-id->service-description-fn
-           service-id->password-fn url start-scheduler-syncer-fn watch-retries]
+  [{:keys [authentication authorizer cluster-name http-options log-bucket-sync-secs log-bucket-url
+           max-patch-retries max-name-length pod-base-port pod-sigkill-delay-secs pod-suffix-length
+           replicaset-api-version replicaset-spec-builder scheduler-name scheduler-state-chan
+           scheduler-syncer-interval-secs service-id->service-description-fn service-id->password-fn
+           url start-scheduler-syncer-fn watch-retries]
     {fileserver-port :port fileserver-scheme :scheme :as fileserver} :fileserver}]
   {:pre [(schema/contains-kind-sub-map? authorizer)
          (or (nil? fileserver-port)
@@ -873,6 +907,8 @@
          (re-matches #"https?" fileserver-scheme)
          (utils/pos-int? (:socket-timeout http-options))
          (utils/pos-int? (:conn-timeout http-options))
+         (and (number? log-bucket-sync-secs) (<= 0 log-bucket-sync-secs 300))
+         (or (nil? log-bucket-url) (some? (io/as-url log-bucket-url)))
          (utils/non-neg-int? max-patch-retries)
          (utils/pos-int? max-name-length)
          (not (string/blank? cluster-name))
@@ -896,13 +932,16 @@
                         (utils/assoc-if-absent :user-agent "waiter-k8s")
                         http-utils/http-client-factory)
         service-id->failed-instances-transient-store (atom {})
+        replicaset-spec-builder-ctx (assoc replicaset-spec-builder
+                                           :log-bucket-sync-secs log-bucket-sync-secs
+                                           :log-bucket-url log-bucket-url)
         replicaset-spec-builder-fn (let [f (-> replicaset-spec-builder
                                                :factory-fn
                                                utils/resolve-symbol
                                                deref)]
                                      (assert (fn? f) "ReplicaSet spec function must be a Clojure fn")
                                      (fn [scheduler service-id service-description]
-                                       (f scheduler service-id service-description replicaset-spec-builder)))
+                                       (f scheduler service-id service-description replicaset-spec-builder-ctx)))
         watch-options (cond-> default-watch-options
                         (some? watch-retries)
                         (assoc :watch-retries watch-retries))
@@ -928,6 +967,7 @@
                                            daemon-state
                                            fileserver
                                            http-client
+                                           log-bucket-url
                                            max-patch-retries
                                            max-name-length
                                            pod-base-port
diff --git a/waiter/src/waiter/settings.clj b/waiter/src/waiter/settings.clj
index 812383269..e50207f2c 100644
--- a/waiter/src/waiter/settings.clj
+++ b/waiter/src/waiter/settings.clj
@@ -176,6 +176,8 @@
         (log/info "reading settings from file:" config-file-path)
         (let [edn-readers {:readers {'config/regex (fn [expr] (re-pattern expr))
                                      'config/env #(env % config-file-path)
+                                     'config/env-default (fn [[var-name default]]
+                                                          (or (System/getenv var-name) default))
                                      'config/env-int #(Integer/parseInt (env % config-file-path))}}
               settings (edn/read-string edn-readers (slurp config-file-path))]
           (log/info "configured settings:\n" (with-out-str (clojure.pprint/pprint settings)))
@@ -299,7 +301,7 @@
                              :mesos-slave-port 5051
                              :search-interval-days 10}
                       :kubernetes {; Default values are not provided below for the following keys:
-                                   ; :authentication [:fileserver :port] :url
+                                   ; :authentication [:fileserver :port] :log-bucket-url :url
                                    :factory-fn 'waiter.scheduler.kubernetes/kubernetes-scheduler
                                    :authorizer {:kind :default
                                                 :default {:factory-fn 'waiter.authorization/noop-authorizer}}
@@ -310,6 +312,7 @@
                                                 :scheme "http"}
                                    :http-options {:conn-timeout 10000
                                                   :socket-timeout 10000}
+                                   :log-bucket-sync-secs 180
                                    :max-patch-retries 5
                                    :max-name-length 63
                                    :pod-base-port 31000
diff --git a/waiter/src/waiter/util/client_tools.clj b/waiter/src/waiter/util/client_tools.clj
index 442464ff2..a1490087b 100644
--- a/waiter/src/waiter/util/client_tools.clj
+++ b/waiter/src/waiter/util/client_tools.clj
@@ -484,6 +484,11 @@
   [waiter-url & {:keys [verbose] :or {verbose false}}]
   (setting waiter-url [:scheduler-config :marathon :url] :verbose verbose))
 
+(defn k8s-log-bucket-url
+  "Returns the S3 bucket url for K8s service log backups"
+  [waiter-url & {:keys [verbose] :or {verbose false}}]
+  (setting waiter-url [:scheduler-config :kubernetes :log-bucket-url] :verbose verbose))
+
 (defn num-tasks-running [waiter-url service-id & {:keys [verbose prev-tasks-running] :or {verbose false prev-tasks-running -1}}]
   (let [http-options {:conn-timeout 10000, :socket-timeout 10000, :spnego-auth use-spnego}
         marathon-url (marathon-url waiter-url :verbose verbose)
diff --git a/waiter/test/waiter/scheduler/kubernetes_test.clj b/waiter/test/waiter/scheduler/kubernetes_test.clj
index d433583c8..de3c4bd8d 100644
--- a/waiter/test/waiter/scheduler/kubernetes_test.clj
+++ b/waiter/test/waiter/scheduler/kubernetes_test.clj
@@ -51,6 +51,8 @@
       :cluster-name "waiter"
       :fileserver {:port 9090
                    :scheme "http"}
+      :log-bucket-sync-secs 60
+      :log-bucket-url nil
       :max-patch-retries 5
       :max-name-length 63
       :pod-base-port 8080
@@ -664,11 +666,12 @@
 
 (deftest test-retrieve-directory-content
   (let [service-id "test-service-id"
-        instance-id "test-service-instance-id-0"
+        instance-id "test-service-id.instance-id-0"
         instance-base-dir "/r0"
+        pod-name "instance-id"
         host "host.local"
         path "/some/path/"
-        dummy-scheduler (make-dummy-scheduler [service-id])
+        {:keys [watch-state] :as dummy-scheduler} (make-dummy-scheduler [service-id])
         port (get-in dummy-scheduler [:fileserver :port])
         make-file (fn [file-name size]
                     {:url (str "http://" host ":" port instance-base-dir path file-name)
@@ -695,6 +698,7 @@
                             (make-dir "x")
                             (make-dir "y")
                             (make-dir "z")]}]]
+    (swap! watch-state assoc-in [:service-id->pod-id->pod service-id pod-name] ::pod)
     (doseq [{:keys [description expected]} inputs]
       (testing description
         (let [actual (with-redefs [hu/http-request (constantly (strip-links expected))]
@@ -725,6 +729,8 @@
                     :watch-state (atom nil)
                     :http-options {:conn-timeout 10000
                                    :socket-timeout 10000}
+                    :log-bucket-sync-secs 60
+                    :log-bucket-url nil
                     :max-patch-retries 5
                     :max-name-length 63
                     :pod-base-port 8080