From 44fc9b3cd4c2fa7be2d179267dde045f366e94a0 Mon Sep 17 00:00:00 2001 From: Scott Crosby Date: Tue, 11 Dec 2018 08:35:09 -0600 Subject: [PATCH] Outgoing/global launch limit (#1038) --- .travis.yml | 2 +- integration/tests/cook/test_multi_user.py | 59 ++++++++++- integration/travis/run_integration.sh | 31 +----- .../travis/run_integration_ratelimit.sh | 100 ++++++++++++++++++ .../travis/scheduler_travis_config.edn | 3 + scheduler/config.edn | 5 +- scheduler/src/cook/config.clj | 3 +- scheduler/src/cook/mesos/constraints.clj | 26 ++++- scheduler/src/cook/mesos/scheduler.clj | 1 + scheduler/src/cook/rate_limit.clj | 17 +++ .../test/cook/test/mesos/constraints.clj | 1 + .../test/cook/test/mesos/fenzo_utils.clj | 1 + scheduler/test/cook/test/mesos/scheduler.clj | 18 +++- scheduler/test/cook/test/testutil.clj | 2 +- 14 files changed, 230 insertions(+), 39 deletions(-) create mode 100755 integration/travis/run_integration_ratelimit.sh diff --git a/.travis.yml b/.travis.yml index b953718107..ab6a0d4062 100644 --- a/.travis.yml +++ b/.travis.yml @@ -60,7 +60,7 @@ matrix: services: docker install: sudo ./travis/install_mesos.sh before_script: cd integration && ./travis/prepare_integration.sh - script: ./travis/run_integration.sh --pools=off --auth=http-basic --job-launch-rate-limit=on + script: ./travis/run_integration_ratelimit.sh - name: 'Cook Scheduler Simulator tests' services: docker diff --git a/integration/tests/cook/test_multi_user.py b/integration/tests/cook/test_multi_user.py index 6bfc7f2f84..c00c80a4ae 100644 --- a/integration/tests/cook/test_multi_user.py +++ b/integration/tests/cook/test_multi_user.py @@ -263,7 +263,7 @@ def test_rate_limit_launching_jobs(self): pytest.skip("Can't test job launch rate limit without launch rate limit set.") # Allow an environmental variable override. - name = os.getenv('COOK_LAUNCH_RATE_LIMIT_NAME') + name = os.getenv('COOK_LAUNCH_RATE_LIMIT_USER_NAME') if name is not None: user = self.user_factory.user_class(name) else: @@ -323,6 +323,63 @@ def is_rate_limit_triggered(_): finally: util.kill_jobs(self.cook_url, job_uuids) + # Note that subsequent runs of this test under the same user can fail if sufficient time has not + # passed; the subsequent run will have used up the rate limit quota and it will need time to recharge. + def test_global_rate_limit_launching_jobs(self): + settings = util.settings(self.cook_url) + if settings['rate-limit']['global-job-launch'] is None: + pytest.skip("Can't test job launch rate limit without launch rate limit set.") + + # Allow an environmental variable override. + name = os.getenv('COOK_LAUNCH_RATE_LIMIT_USER_NAME') + if name is not None: + user = self.user_factory.user_class(name) + else: + user = self.user_factory.new_user() + + if not settings['rate-limit']['global-job-launch']['enforce?']: + pytest.skip("Enforcing must be on for test to run") + bucket_size = settings['rate-limit']['global-job-launch']['bucket-size'] + token_rate = settings['rate-limit']['global-job-launch']['tokens-replenished-per-minute'] + # In some environments, e.g., minimesos, we can only launch so many concurrent jobs. + if token_rate < 5 or token_rate > 20: + pytest.skip( + "Global job launch rate limit test is only validated to reliably work correctly with certain token rates.") + if bucket_size < 10 or bucket_size > 20: + pytest.skip( + "Global job launch rate limit test is only validated to reliably work correctly with certain token bucket sizes.") + with user: + job_uuids = [] + try: + jobspec = {"command": "sleep 240", 'cpus': 0.03, 'mem': 32} + + self.logger.info(f'Submitting initial batch of {bucket_size-1} jobs') + initial_uuids, initial_response = util.submit_jobs(self.cook_url, jobspec, bucket_size - 1) + job_uuids.extend(initial_uuids) + self.assertEqual(201, initial_response.status_code, msg=initial_response.content) + + def submit_jobs(): + self.logger.info(f'Submitting subsequent batch of {bucket_size-1} jobs') + subsequent_uuids, subsequent_response = util.submit_jobs(self.cook_url, jobspec, bucket_size - 1) + job_uuids.extend(subsequent_uuids) + self.assertEqual(201, subsequent_response.status_code, msg=subsequent_response.content) + + def is_rate_limit_triggered(_): + jobs1 = util.query_jobs(self.cook_url, True, uuid=job_uuids).json() + running_jobs = [j for j in jobs1 if j['status'] == 'running'] + waiting_jobs = [j for j in jobs1 if j['status'] == 'waiting'] + self.logger.debug(f'There are {len(waiting_jobs)} waiting jobs') + return len(waiting_jobs) > 0 and len(running_jobs) >= bucket_size + + util.wait_until(submit_jobs, is_rate_limit_triggered,120000,5000) + jobs2 = util.query_jobs(self.cook_url, True, uuid=job_uuids).json() + running_jobs = [j for j in jobs2 if j['status'] == 'running'] + self.assertGreaterEqual(len(running_jobs), bucket_size) + self.assertLessEqual(len(running_jobs), bucket_size+4) + finally: + util.kill_jobs(self.cook_url, job_uuids) + + def trigger_preemption(self, pool): """ Triggers preemption on the provided pool (which can be None) by doing the following: diff --git a/integration/travis/run_integration.sh b/integration/travis/run_integration.sh index 5bd2e0ab44..37e3f0bd9c 100755 --- a/integration/travis/run_integration.sh +++ b/integration/travis/run_integration.sh @@ -13,7 +13,6 @@ COOK_AUTH=one-user COOK_EXECUTOR=mesos COOK_POOLS=on CONFIG_FILE=scheduler_travis_config.edn -JOB_LAUNCH_RATE_LIMIT=off while (( $# > 0 )); do case "$1" in @@ -29,10 +28,6 @@ while (( $# > 0 )); do COOK_POOLS="${1#--pools=}" shift ;; - --job-launch-rate-limit=*) - JOB_LAUNCH_RATE_LIMIT="${1#--job-launch-rate-limit=}" - shift - ;; *) echo "Unrecognized option: $1" exit 1 @@ -112,24 +107,6 @@ case "$COOK_POOLS" in exit 1 esac -case "$JOB_LAUNCH_RATE_LIMIT" in - on) - # Note: Carefully chosen for test_rate_limit_launching_jobs unit test. - export JOB_LAUNCH_RATE_LIMIT_BUCKET_SIZE=10 - export JOB_LAUNCH_RATE_LIMIT_REPLENISHED_PER_MINUTE=5 - echo "Job launch rate limit turned on" - ;; - off) - # Note: Wide enough that we're unlikely to hit these in testing. - export JOB_LAUNCH_RATE_LIMIT_BUCKET_SIZE=10000 - export JOB_LAUNCH_RATE_LIMIT_REPLENISHED_PER_MINUTE=10000 - echo "Job launch rate limit turned off" - ;; - *) - echo "Unrecognized job-launch-rate-limit toggle (should be on/off): $JOB_LAUNCH_RATE_LIMIT" - exit 1 -esac - pip install flask export DATA_LOCAL_PORT=35847 export DATA_LOCAL_SERVICE="http://localhost:${DATA_LOCAL_PORT}" @@ -187,12 +164,8 @@ export COOK_SLAVE_URL=http://localhost:12323 export COOK_MESOS_LEADER_URL=${MINIMESOS_MASTER} { echo "Using Mesos leader URL: ${COOK_MESOS_LEADER_URL}" - if [ "$JOB_LAUNCH_RATE_LIMIT" = off ]; then - pytest -n4 -v --color=no --timeout-method=thread --boxed -m "not serial" || test_failures=true - pytest -n0 -v --color=no --timeout-method=thread --boxed -m "serial" || test_failures=true - else - pytest -n0 -v --color=no --timeout-method=thread --boxed -m multi_user tests/cook/test_multi_user.py -k test_rate_limit_launching_jobs || test_failures=true - fi + pytest -n4 -v --color=no --timeout-method=thread --boxed -m "not serial" || test_failures=true + pytest -n0 -v --color=no --timeout-method=thread --boxed -m "serial" || test_failures=true } &> >(tee ./log/pytest.log) diff --git a/integration/travis/run_integration_ratelimit.sh b/integration/travis/run_integration_ratelimit.sh new file mode 100755 index 0000000000..2cbd9ba533 --- /dev/null +++ b/integration/travis/run_integration_ratelimit.sh @@ -0,0 +1,100 @@ +#!/bin/bash + +# Usage: ./run_integration [OPTIONS...] + +set -ev + +export PROJECT_DIR=`pwd` + +CONFIG_FILE=scheduler_travis_config.edn + +function wait_for_cook { + COOK_PORT=${1:-12321} + while ! curl -s localhost:${COOK_PORT} >/dev/null; + do + echo "$(date +%H:%M:%S) Cook is not listening on ${COOK_PORT} yet" + sleep 2.0 + done + echo "$(date +%H:%M:%S) Connected to Cook on ${COOK_PORT}!" + curl -s localhost:${COOK_PORT}/info + echo +} +export -f wait_for_cook + +# Start minimesos +cd ${TRAVIS_BUILD_DIR}/travis +./minimesos up +$(./minimesos info | grep MINIMESOS) +export COOK_ZOOKEEPER="${MINIMESOS_ZOOKEEPER_IP}:2181" +export MINIMESOS_ZOOKEEPER=${MINIMESOS_ZOOKEEPER%;} +export MINIMESOS_MASTER=${MINIMESOS_MASTER%;} + +SCHEDULER_DIR=${TRAVIS_BUILD_DIR}/scheduler +COOK_DATOMIC_URI_1=datomic:mem://cook-jobs + +# Generate SSL certificate +COOK_KEYSTORE_PATH=${SCHEDULER_DIR}/cook.p12 +keytool -genkeypair -keystore ${COOK_KEYSTORE_PATH} -storetype PKCS12 -storepass cookstore -dname "CN=cook, OU=Cook Developers, O=Two Sigma Investments, L=New York, ST=New York, C=US" -keyalg RSA -keysize 2048 +export COOK_KEYSTORE_PATH=${COOK_KEYSTORE_PATH} + +mkdir ${SCHEDULER_DIR}/log + +cd ${SCHEDULER_DIR} + +# Start two cook schedulers. +export COOK_HTTP_BASIC_AUTH=true +export COOK_EXECUTOR_COMMAND="" +## We launch two instances, with different configurations for the different unit tests. +## on travis, ports on 172.17.0.1 are bindable from the host OS, and are also +## available for processes inside minimesos containers to connect to +# Start one cook listening on port 12321, this will be the master of the "cook-framework-1" framework +export GLOBAL_JOB_LAUNCH_RATE_LIMIT_BUCKET_SIZE=10000 +export GLOBAL_JOB_LAUNCH_RATE_LIMIT_REPLENISHED_PER_MINUTE=10000 +export JOB_LAUNCH_RATE_LIMIT_BUCKET_SIZE=10 +export JOB_LAUNCH_RATE_LIMIT_REPLENISHED_PER_MINUTE=5 +LIBPROCESS_IP=172.17.0.1 COOK_DATOMIC="${COOK_DATOMIC_URI_1}" COOK_PORT=12321 COOK_SSL_PORT=12322 COOK_COOKEEPER_LOCAL=true COOK_COOKEEPER_LOCAL_PORT=5291 COOK_FRAMEWORK_ID=cook-framework-1 COOK_LOGFILE="log/cook-12321.log" COOK_DEFAULT_POOL=${DEFAULT_POOL} lein run ${PROJECT_DIR}/travis/${CONFIG_FILE} & +# Start a second cook listening on port 22321, this will be the master of the "cook-framework-2" framework +export JOB_LAUNCH_RATE_LIMIT_BUCKET_SIZE=10000 +export JOB_LAUNCH_RATE_LIMIT_REPLENISHED_PER_MINUTE=10000 +export GLOBAL_JOB_LAUNCH_RATE_LIMIT_BUCKET_SIZE=10 +export GLOBAL_JOB_LAUNCH_RATE_LIMIT_REPLENISHED_PER_MINUTE=5 +LIBPROCESS_IP=172.17.0.1 COOK_DATOMIC="${COOK_DATOMIC_URI_1}" COOK_PORT=22321 COOK_SSL_PORT=22322 COOK_ZOOKEEPER_LOCAL=true COOK_ZOOKEEPER_LOCAL_PORT=4291 COOK_FRAMEWORK_ID=cook-framework-2 COOK_LOGFILE="log/cook-22321.log" lein run ${PROJECT_DIR}/travis/${CONFIG_FILE} & + +# Wait for the cooks to be listening +timeout 180s bash -c "wait_for_cook 12321" || curl_error=true +if [ "$curl_error" = true ]; then + echo "$(date +%H:%M:%S) Timed out waiting for cook to start listening" + ${TRAVIS_BUILD_DIR}/travis/upload_logs.sh + exit 1 +fi + +timeout 180s bash -c "wait_for_cook 22321" || curl_error=true +if [ "$curl_error" = true ]; then + echo "$(date +%H:%M:%S) Timed out waiting for cook to start listening" + ${TRAVIS_BUILD_DIR}/travis/upload_logs.sh + exit 1 +fi + +# Ensure the Cook Scheduler CLI is available +command -v cs + +# Run the integration tests +cd ${PROJECT_DIR} +export COOK_MESOS_LEADER_URL=${MINIMESOS_MASTER} +{ + echo "Using Mesos leader URL: ${COOK_MESOS_LEADER_URL}" + export COOK_SCHEDULER_URL=http://localhost:12321 + pytest -n0 -v --color=no --timeout-method=thread --boxed -m multi_user tests/cook/test_multi_user.py -k test_rate_limit_launching_jobs || test_failures=true + + + export COOK_SCHEDULER_URL=http://localhost:22321 + pytest -n0 -v --color=no --timeout-method=thread --boxed -m multi_user tests/cook/test_multi_user.py -k test_global_rate_limit_launching_jobs || test_failures=true + } &> >(tee ./log/pytest.log) + + +# If there were failures, then we should save the logs +if [ "$test_failures" = true ]; then + echo "Uploading logs..." + ${TRAVIS_BUILD_DIR}/travis/upload_logs.sh + exit 1 +fi diff --git a/integration/travis/scheduler_travis_config.edn b/integration/travis/scheduler_travis_config.edn index 0aee44d64a..f1f67ffe04 100644 --- a/integration/travis/scheduler_travis_config.edn +++ b/integration/travis/scheduler_travis_config.edn @@ -34,6 +34,9 @@ :rate-limit {:expire-minutes 120 ; Expire unused rate limit entries after 2 hours. ; Keep these job-launch and job-submission values as they are for integration tests. Making them smaller can cause ; spurious failures, and making them larger will cause the rate-limit integration test to skip itself. + :global-job-launch {:bucket-size #config/env-int-default ["GLOBAL_JOB_LAUNCH_RATE_LIMIT_BUCKET_SIZE" 10000] + :enforce? true + :tokens-replenished-per-minute #config/env-int-default ["GLOBAL_JOB_LAUNCH_RATE_LIMIT_REPLENISHED_PER_MINUTE" 10000]} :job-launch {:bucket-size #config/env-int-default ["JOB_LAUNCH_RATE_LIMIT_BUCKET_SIZE" 10000] :enforce? true :tokens-replenished-per-minute #config/env-int-default ["JOB_LAUNCH_RATE_LIMIT_REPLENISHED_PER_MINUTE" 10000]} diff --git a/scheduler/config.edn b/scheduler/config.edn index 6566b038aa..0ae8de96b0 100644 --- a/scheduler/config.edn +++ b/scheduler/config.edn @@ -48,9 +48,12 @@ :keystore-path #config/env "COOK_KEYSTORE_PATH" :keystore-type "pkcs12" :keystore-pass "cookstore"} - :rate-limit {:expire-minutes 120 ; Expire unused rate limit entries after 2 hours. + :rate-limit {:expire-minutes 1200 ; Expire unused rate limit entries after 20 hours. ; Keep these job-launch and job-submission values as they are for integration tests. Making them smaller can cause ; spurious failures, and making them larger will cause test_rate_limit_launching_jobs to skip itself. + :global-job-launch {:bucket-size 10000 + :enforce? true + :tokens-replenished-per-minute 5000} :job-launch {:bucket-size 10000 :enforce? true :tokens-replenished-per-minute 5000} diff --git a/scheduler/src/cook/config.clj b/scheduler/src/cook/config.clj index f6e42d1a9b..971024d984 100644 --- a/scheduler/src/cook/config.clj +++ b/scheduler/src/cook/config.clj @@ -204,10 +204,11 @@ ((util/lazy-load-var 'cook.impersonation/create-impersonation-middleware) impersonators) {:json-value "config-impersonation"}))) :rate-limit (fnk [[:config {rate-limit nil}]] - (let [{:keys [expire-minutes user-limit-per-m job-submission job-launch] + (let [{:keys [expire-minutes user-limit-per-m global-job-launch job-submission job-launch] :or {expire-minutes 120 user-limit-per-m 600}} rate-limit] {:expire-minutes expire-minutes + :global-job-launch global-job-launch :job-submission job-submission :job-launch job-launch :user-limit (->UserRateLimit :user-limit user-limit-per-m (t/minutes 1))})) diff --git a/scheduler/src/cook/mesos/constraints.clj b/scheduler/src/cook/mesos/constraints.clj index 82e365e2fd..d6fe5a7086 100644 --- a/scheduler/src/cook/mesos/constraints.clj +++ b/scheduler/src/cook/mesos/constraints.clj @@ -23,6 +23,7 @@ [cook.mesos.data-locality :as dl] [cook.mesos.group :as group] [cook.mesos.util :as util] + [cook.rate-limit :as ratelimit] [swiss.arrows :refer :all]) (:import com.netflix.fenzo.VirtualMachineLease java.util.Date)) @@ -234,6 +235,21 @@ (when (< 0 max-expected-runtime) (->estimated-completion-constraint expected-end-time host-lifetime-mins)))))) +(defn build-launch-max-tasks-constraint + "This returns a Fenzo hard constraint that ensures that we don't match more than a given number of tasks per cycle." + [] + (let [enforcing? (ratelimit/enforce? ratelimit/global-job-launch-rate-limiter) + max-tasks (ratelimit/get-token-count! ratelimit/global-job-launch-rate-limiter ratelimit/global-job-launch-rate-limiter-key)] + (if enforcing? + (reify com.netflix.fenzo.ConstraintEvaluator + (getName [_] "launch_max_tasks") + (evaluate [_ _ _ task-tracker-state] + (let [num-assigned (-> task-tracker-state .getAllCurrentlyAssignedTasks .size)] + (com.netflix.fenzo.ConstraintEvaluator$Result. + (< num-assigned max-tasks) + (str "Hit the global rate limit"))))) + nil))) + (def job-constraint-constructors [build-novel-host-constraint build-gpu-host-constraint build-user-defined-constraint build-estimated-completion-constraint build-data-locality-constraint]) (defn fenzoize-job-constraint @@ -260,10 +276,12 @@ (defn make-fenzo-job-constraints "Returns a sequence of all the constraints for 'job', in Fenzo-compatible format." [job] - (->> job-constraint-constructors - (map (fn [constructor] (constructor job))) - (remove nil?) - (map fenzoize-job-constraint))) + (let [launch-max-tasks-constraint (build-launch-max-tasks-constraint)] + (cond-> (->> job-constraint-constructors + (map (fn [constructor] (constructor job))) + (remove nil?) + (map fenzoize-job-constraint)) + launch-max-tasks-constraint (conj (build-launch-max-tasks-constraint))))) (defn build-rebalancer-reservation-constraint "Constructs a rebalancer-reservation-constraint" diff --git a/scheduler/src/cook/mesos/scheduler.clj b/scheduler/src/cook/mesos/scheduler.clj index 9a20848ed0..3aff89f0e8 100644 --- a/scheduler/src/cook/mesos/scheduler.clj +++ b/scheduler/src/cook/mesos/scheduler.clj @@ -707,6 +707,7 @@ matches) (throw e)))) (log/info "Launching" (count task-txns) "tasks") + (ratelimit/spend! ratelimit/global-job-launch-rate-limiter ratelimit/global-job-launch-rate-limiter-key (count task-txns)) (log/debug "Matched tasks" task-txns) ;; This launch-tasks MUST happen after the above transaction in ;; order to allow a transaction failure (due to failed preconditions) diff --git a/scheduler/src/cook/rate_limit.clj b/scheduler/src/cook/rate_limit.clj index f1fc25f567..d64ae35ac5 100644 --- a/scheduler/src/cook/rate_limit.clj +++ b/scheduler/src/cook/rate_limit.clj @@ -55,3 +55,20 @@ (mount/defstate job-launch-rate-limiter :start (create-job-launch-rate-limiter config)) + +(defn create-global-job-launch-rate-limiter + "From the configuration map, extract the keys that setup the job-launch rate limiter and return + the constructed object. If the configuration map is not found, the AllowAllRateLimiter is returned." + [config] + (let [{:keys [settings]} config + {:keys [rate-limit]} settings + {:keys [expire-minutes global-job-launch]} rate-limit] + (if (seq global-job-launch) + (let [{:keys [bucket-size enforce? tokens-replenished-per-minute]} global-job-launch] + (rtg/make-token-bucket-filter bucket-size tokens-replenished-per-minute expire-minutes enforce?)) + AllowAllRateLimiter))) + +(mount/defstate global-job-launch-rate-limiter + :start (create-global-job-launch-rate-limiter config)) + +(def global-job-launch-rate-limiter-key "*DEF*") \ No newline at end of file diff --git a/scheduler/test/cook/test/mesos/constraints.clj b/scheduler/test/cook/test/mesos/constraints.clj index 722e5f85bc..862885874f 100644 --- a/scheduler/test/cook/test/mesos/constraints.clj +++ b/scheduler/test/cook/test/mesos/constraints.clj @@ -54,6 +54,7 @@ (deftest test-gpu-constraint + (cook.test.testutil/setup) (let [framework-id #mesomatic.types.FrameworkID{:value "my-framework-id"} gpu-offer #mesomatic.types.Offer{:id #mesomatic.types.OfferID {:value "my-offer-id"} :framework-id framework-id diff --git a/scheduler/test/cook/test/mesos/fenzo_utils.clj b/scheduler/test/cook/test/mesos/fenzo_utils.clj index bdcad7148c..3209876561 100644 --- a/scheduler/test/cook/test/mesos/fenzo_utils.clj +++ b/scheduler/test/cook/test/mesos/fenzo_utils.clj @@ -92,6 +92,7 @@ (deftest test-record-placement-failures + (cook.test.testutil/setup) (let [uri "datomic:mem://test-record-placement-failures" conn (restore-fresh-database! uri) job-id (create-dummy-job conn :under-investigation true) diff --git a/scheduler/test/cook/test/mesos/scheduler.clj b/scheduler/test/cook/test/mesos/scheduler.clj index f5c8896a86..c577150f3d 100644 --- a/scheduler/test/cook/test/mesos/scheduler.clj +++ b/scheduler/test/cook/test/mesos/scheduler.clj @@ -343,6 +343,7 @@ (is (= 1000.0 (:mem resources))))) (deftest test-match-offer-to-schedule + (setup) (let [schedule (map #(d/entity (db c) %) [j1 j2 j3 j4]) ; all 1gb 1 cpu offer-maker (fn [cpus mem] [{:resources [{:name "cpus" :type :value-scalar :scalar cpus} @@ -766,6 +767,7 @@ distinct)))))))) (deftest test-attr-equals-host-placement-constraint + (setup) (let [uri "datomic:mem://test-attr-equals-host-placement-constraint" conn (restore-fresh-database! uri) framework-id #mesomatic.types.FrameworkID{:value "my-original-framework-id"} @@ -1661,6 +1663,19 @@ (is (= 1 (count @launched-job-ids-atom))) (is (= #{"job-1"} (set @launched-job-ids-atom)))))) + (with-redefs [rate-limit/job-launch-rate-limiter + (rate-limit/create-job-launch-rate-limiter job-launch-rate-limit-config-for-testing) + rate-limit/get-token-count! (constantly 1)] + (testing "enough offers for all normal jobs, limited by num-considerable of 2, but only one token in global rate limit for one job" + ;; We filter so that fenzo only matches one job, so we should only launch the one job. + (let [num-considerable 2 + offers [offer-1 offer-2 offer-3]] + (is (run-handle-resource-offers! num-considerable offers :normal)) + (is (= :end-marker (async/ @total-spent (+ tokens))))] (testing "enough offers for all normal jobs, limited by num-considerable of 2. Make sure we spend the tokens." @@ -1671,7 +1686,8 @@ (is (= 2 (count @launched-offer-ids-atom))) (is (= 2 (count @launched-job-ids-atom))) (is (= #{"job-1" "job-2"} (set @launched-job-ids-atom))) - (is (= 2 @total-spent)))))) + ; We launch two jobs, this involves spending two tokens on per-user rate limiter and 2 on the global launch rate limiter. + (is (= 4 @total-spent)))))) (testing "enough offers for all normal jobs, limited by quota" (let [num-considerable 1 diff --git a/scheduler/test/cook/test/testutil.clj b/scheduler/test/cook/test/testutil.clj index ecb466c479..a427c25abe 100644 --- a/scheduler/test/cook/test/testutil.clj +++ b/scheduler/test/cook/test/testutil.clj @@ -291,7 +291,7 @@ "Given an optional config map, initializes the config state" [& {:keys [config], :or nil}] (mount/stop) - (mount/start-with-args (merge minimal-config config) #'cook.config/config #'cook.rate-limit/job-launch-rate-limiter))) + (mount/start-with-args (merge minimal-config config) #'cook.config/config #'cook.rate-limit/job-launch-rate-limiter #'cook.rate-limit/global-job-launch-rate-limiter))) (defn wait-for "Invoke predicate every interval (default 10) seconds until it returns true,