Permalink
Browse files

Removes utilization check from rebalancer (#887)

* Removes utilization check from rebalancer

* Removes more occurrences of the config fields

* Logs warnings when the removed config fields are used
  • Loading branch information...
dposada authored and shamsimam committed Jun 18, 2018
1 parent 91dff42 commit 32789813d1454dac093382388c9d4cec1be6ca09
@@ -318,13 +318,10 @@ def retrieve_mesos_url(varname='MESOS_PORT', value='5050'):
mesos_url = os.getenv('COOK_MESOS_LEADER_URL')
if mesos_url is None:
mesos_port = os.getenv(varname, value)
cook_url = retrieve_cook_url()
_wait_for_cook(cook_url)
mesos_master_hosts = settings(cook_url).get('mesos-master-hosts', ['localhost'])
resp = session.get('http://%s:%s/redirect' % (mesos_master_hosts[0], mesos_port), allow_redirects=False)
resp = session.get(f'http://localhost:{mesos_port}/redirect', allow_redirects=False)
if resp.status_code != 307:
raise RuntimeError('Unable to find mesos leader, redirect endpoint returned %d' % resp.status_code)
mesos_url = 'http:%s' % resp.headers['Location']
raise RuntimeError(f'Unable to find mesos leader, redirect endpoint returned {resp.status_code}')
mesos_url = f"http:{resp.headers['Location']}"
logger.info(f'Using mesos url {mesos_url}')
return mesos_url

@@ -30,7 +30,6 @@
:rate-limit {:user-limit-per-m 1000000}
:rebalancer {:dru-scale 1}
:mesos {:master #config/env "MINIMESOS_ZOOKEEPER"
:master-hosts [#config/env "MINIMESOS_MASTER_IP"]
:failover-timeout-ms nil
:leader-path "/cook-scheduler"
:role "cook"
@@ -125,7 +125,6 @@ docker create \
-e "COOK_NREPL_PORT=${COOK_NREPL_PORT}" \
-e "COOK_FRAMEWORK_ID=${COOK_FRAMEWORK_ID}" \
-e "MESOS_MASTER=${ZK}" \
-e "MESOS_MASTER_HOST=${MINIMESOS_MASTER_IP}" \
-e "COOK_ZOOKEEPER=${COOK_ZOOKEEPER}" \
-e "COOK_ZOOKEEPER_LOCAL=${COOK_ZOOKEEPER_LOCAL}" \
-e "COOK_HOSTNAME=${NAME}" \
@@ -65,7 +65,6 @@ export COOK_ZOOKEEPER="${COOK_ZOOKEEPER}"
export COOK_ZOOKEEPER_LOCAL="${COOK_ZOOKEEPER_LOCAL}"
export LIBPROCESS_IP="${MASTER_IP}"
export MESOS_MASTER="${MASTER_IP}:5050"
export MESOS_MASTER_HOST="${MASTER_IP}"
export MESOS_NATIVE_JAVA_LIBRARY="${MESOS_NATIVE_JAVA_LIBRARY}"
export COOK_SSL_PORT="${COOK_SSL_PORT}"
export COOK_KEYSTORE_PATH="${COOK_KEYSTORE_PATH}"
@@ -31,8 +31,7 @@
:mesos {:failover-timeout-ms nil ; When we close the instance of Cook, all its tasks are killed by Mesos
:framework-id #config/env "COOK_FRAMEWORK_ID"
:leader-path "/cook-scheduler"
:master #config/env "MESOS_MASTER"
:master-hosts [#config/env "MESOS_MASTER_HOST"]}
:master #config/env "MESOS_MASTER"}
:nrepl {:enabled? true
:port #config/env-int "COOK_NREPL_PORT"}
:pools {:default "gamma"}
@@ -111,10 +111,6 @@ We'll look at the configurable options in turn:
This option sets the Mesos master connection string.
For example, if you are running Mesos with a Zookeeper node on the local machine (a common development setup), you'd use the connection string `zk://localhost:2181/mesos`.

`:master-hosts`::
This configures a list of hosts where Mesos master are running on. For example `["zk1.example.com", "zk2.example.com", "zk3.example.com"]`. +
When not set explicitly, Mesos master hosts are derived from Mesos master connection string. For example, if the connection string is `"zk://zk1.example.com:2181,zk2.example.com:2181,zk3.example.com:2181/mesos"`, master hosts will be set to `["zk1.example.com", "zk2.example.com", "zk3.example.com"]`.

`:failover-timeout-ms`::
This options sets the number of milliseconds that Mesos will wait for the Cook framework to reconnect.
In development, you should set this to `nil`, which means that Mesos will treat any disconnection of Cook as the framework ending; this will kill all of Cook's tasks when it disconnects.
@@ -260,9 +256,6 @@ Optionally, you can include a "rebalancer" stanza. If you do, on startup, Cook
:max-preemption::
See the link:rebalancer-config.adoc[Rebalancer documentation]

:min-utilization-threshold::
See the link:rebalancer-config.adoc[Rebalancer documentation]

`:dru-scale`::
This is only used to control the metrics reporting of DRU values. On some clusters,
the DRU's may be so small that when the values are fed to clj-metrics, they are
@@ -19,7 +19,6 @@ In Cook's database, Rebalancer configuration options are facts about the entity
(let [conn (d/connect "(your-cook-database-url)")
db (d/db conn)]
@(d/transact conn [{:db/id :rebalancer/config
:rebalancer.config/min-utilization-threshold 0.0
:rebalancer.config/safe-dru-threshold 0.0
:rebalancer.config/min-dru-diff 0.0000000001
:rebalancer.config/max-preemption 64.0}]))
@@ -38,7 +37,6 @@ Conversely, you can read the current configuration like this:
* safe-dru-threshold: Task with a DRU lower than safe-dru-threshold will not be preempted. If safe-dru-threshold is set to 1.0, then tasks that consume resources in aggregate less than the user resource share will not be preempted.
* min-dru-diff: The minimal DRU difference required to make a preemption action. This is also the maximal "unfairness" Rebalancer is willing to tolerate.
* max-preemption: The maximum number of preemptions Rebalancer can make in one cycle.
* min-utilization-threshold: The minimal cluster utilization to trigger rebalancer. The idea is that the rebalancer should only run when the cluster is at high utilization. If the cluster is not at high utilization, its available resources should be used first before we perform any preemption.

=== Configuring user shares

@@ -80,7 +80,7 @@
{:mesos-scheduler (fnk [[:settings fenzo-fitness-calculator fenzo-floor-iterations-before-reset
fenzo-floor-iterations-before-warn fenzo-max-jobs-considered fenzo-scaleback
good-enough-fitness hostname mea-culpa-failure-limit mesos-failover-timeout mesos-framework-name
mesos-gpu-enabled mesos-leader-path mesos-master mesos-master-hosts mesos-principal
mesos-gpu-enabled mesos-leader-path mesos-master mesos-principal
mesos-role mesos-run-as-user offer-incubate-time-ms optimizer progress rebalancer server-port
task-constraints]
curator-framework framework-id mesos-datomic-mult mesos-leadership-atom
@@ -100,7 +100,6 @@
:mesos-role mesos-role
:mesos-framework-name mesos-framework-name
:gpu-enabled? mesos-gpu-enabled})
get-mesos-utilization-fn (partial (util/lazy-load-var 'cook.mesos/get-mesos-utilization) mesos-master-hosts)
trigger-chans ((util/lazy-load-var 'cook.mesos/make-trigger-chans) rebalancer progress optimizer task-constraints)]
(try
(Class/forName "org.apache.mesos.Scheduler")
@@ -113,7 +112,6 @@
:fenzo-fitness-calculator fenzo-fitness-calculator
:good-enough-fitness good-enough-fitness}
:framework-id framework-id
:get-mesos-utilization get-mesos-utilization-fn
:gpu-enabled? mesos-gpu-enabled
:make-mesos-driver-fn make-mesos-driver-fn
:mea-culpa-failure-limit mea-culpa-failure-limit
@@ -267,17 +267,10 @@
(when scheduler
(or (:good-enough-fitness scheduler) 0.8)))
:mesos-master (fnk [[:config {mesos nil}]]
(when (:master-hosts mesos)
(log/warn "The :master-hosts configuration field is no longer used"))
(when mesos
(:master mesos)))
:mesos-master-hosts (fnk [[:config {mesos nil}]]
(when mesos
(if (:master-hosts mesos)
(if (and (sequential? (:master-hosts mesos)) (every? string? (:master-hosts mesos)))
(:master-hosts mesos)
(throw (ex-info ":mesos-master should be a list of hostnames (e.g. [\"host1.example.com\", ...])" {})))
(->> (:master mesos)
(re-seq #"[/|,]?([^/,:]+):\d+")
(mapv second)))))
:mesos-failover-timeout (fnk [[:config {mesos nil}]]
(:failover-timeout-ms mesos))
:mesos-leader-path (fnk [[:config {mesos nil}]]
@@ -332,6 +325,8 @@
user-metrics-interval-seconds)

:rebalancer (fnk [[:config {rebalancer nil}]]
(when (:min-utilization-threshold rebalancer)
(log/warn "The :min-utilization-threshold configuration field is no longer used"))
(merge {:interval-seconds 300
:dru-scale 1.0}
rebalancer))
@@ -358,8 +353,8 @@
:api-only? (fnk [[:config {api-only? false}]]
api-only?)
:estimated-completion-constraint (fnk [[:config {estimated-completion-constraint nil}]]
(merge {:agent-start-grace-period-mins 10}
estimated-completion-constraint))}))
(merge {:agent-start-grace-period-mins 10}
estimated-completion-constraint))}))

(defn read-config
"Given a config file path, reads the config and returns the map"
@@ -118,24 +118,6 @@
(when mesos-principal
[{:principal mesos-principal}]))))

(defn get-mesos-utilization
"Queries the mesos master to get the utilization.
Returns the max of cpu and mem utilization as a decimal (e.g. 0.92)"
[mesos-master-hosts]
(let [mesos-master-urls (map #(str "http://" % ":5050/metrics/snapshot") mesos-master-hosts)
get-stats (fn [url] (some->> url
(http/get)
(:body)
(json/read-str)))
utilization (some-<>> mesos-master-urls
(map get-stats)
(filter #(pos? (get % "master/elected")))
(first)
(select-keys <> ["master/cpus_percent" "master/mem_percent"])
(vals)
(apply max))]
utilization))

(defn make-trigger-chans
"Creates a map of of the trigger channels expected by `start-mesos-scheduler`
Each channel receives chime triggers at particular intervals and it is
@@ -165,8 +147,6 @@
Parameters
make-mesos-driver-fn -- fn, function that accepts a mesos scheduler and framework id
and returns a mesos driver
get-mesos-utilization -- fn, function with no parameters, returns utilization of cluster [0,1]
mesos-master-hosts -- seq[strings], url of mesos masters to query for cluster info
curator-framework -- curator object, object for interacting with zk
mesos-datomic-conn -- datomic conn, connection to datomic db for interacting with datomic
mesos-datomic-mult -- async channel, feed of db writes
@@ -183,7 +163,7 @@
framework-id -- str, the Mesos framework id from the cook settings
fenzo-config -- map, config for fenzo, See scheduler/docs/configuration.adoc for more details
sandbox-syncer-state -- map, representing the sandbox syncer object"
[{:keys [curator-framework fenzo-config framework-id get-mesos-utilization gpu-enabled? make-mesos-driver-fn
[{:keys [curator-framework fenzo-config framework-id gpu-enabled? make-mesos-driver-fn
mea-culpa-failure-limit mesos-datomic-conn mesos-datomic-mult mesos-leadership-atom mesos-pending-jobs-atom
mesos-run-as-user offer-cache offer-incubate-time-ms optimizer-config progress-config rebalancer-config
sandbox-syncer-state server-config task-constraints trigger-chans zk-prefix]}]
@@ -243,7 +223,6 @@
(cook.mesos.rebalancer/start-rebalancer! {:config rebalancer-config
:conn mesos-datomic-conn
:driver driver
:get-mesos-utilization get-mesos-utilization
:offer-cache offer-cache
:pending-jobs-atom mesos-pending-jobs-atom
:rebalancer-reservation-atom rebalancer-reservation-atom
@@ -122,7 +122,6 @@
;;; Rebalancer is willing to tolerate.
;;;
;;; max-preemption: The maximum number of preemptions Rebalancer can make in one cycle.
;;; min-utilization-threshold: The minimal cluster utilization to trigger rebalancer. The idea is that the rebalancer should only run when the cluster is at high utilization. If the cluster is not at high utilization, its available resources should be used first before we perform any preemption.

;;; Before you read the code...Here are something you should know about
;;;
@@ -498,7 +497,6 @@

(def datomic-params [:max-preemption
:min-dru-diff
:min-utilization-threshold
:safe-dru-threshold])

(defn read-datomic-params
@@ -522,26 +520,23 @@
recognized-params))]))))

(defn start-rebalancer!
[{:keys [config conn driver get-mesos-utilization offer-cache pending-jobs-atom
[{:keys [config conn driver offer-cache pending-jobs-atom
rebalancer-reservation-atom trigger-chan view-incubating-offers]}]
(binding [metrics-dru-scale (:dru-scale config)]
(update-datomic-params-from-config! conn config)
(util/chime-at-ch
trigger-chan
(fn trigger-rebalance-iteration []
(log/info "Rebalance cycle starting")
(let [{:keys [min-utilization-threshold] :as params} (read-datomic-params conn)
utilization (get-mesos-utilization)
host->spare-resources (->> (view-incubating-offers)
(map (fn [v]
[(:hostname v)
(select-keys (keywordize-keys (:resources v))
[:cpus :mem :gpus])]))
(into {}))]
(if (and (seq params)
min-utilization-threshold
(> utilization min-utilization-threshold))
(let [{normal-pending-jobs :normal gpu-pending-jobs :gpu} @pending-jobs-atom]
(let [params (read-datomic-params conn)]
(if (seq params)
(let [host->spare-resources (->> (view-incubating-offers)
(map (fn [v]
[(:hostname v)
(select-keys (keywordize-keys (:resources v))
[:cpus :mem :gpus])]))
(into {}))
{normal-pending-jobs :normal gpu-pending-jobs :gpu} @pending-jobs-atom]
(rebalance! conn driver offer-cache normal-pending-jobs host->spare-resources
rebalancer-reservation-atom
(assoc params :category :normal
@@ -550,49 +545,6 @@
rebalancer-reservation-atom
(assoc params :category :gpu
:compute-pending-job-dru compute-pending-gpu-job-dru)))
(log/info "Skipping rebalancing due to low cluster utilization"
{:mesos-utilization (str utilization)
:min-utilization-threshold (str min-utilization-threshold)}))))
(log/info "Skipping rebalancing because it's not cofigured"))))
{:error-handler (fn [ex] (log/error ex "Rebalance failed"))})
#(async/close! trigger-chan)))

(comment
; Useful function to simulate preemptions
(defn update-task-by-name
[name reason-name]
(let [conn (d/connect "datomic:mem://mesos-jobs")
running (ffirst (q '[:find ?status
:in $ ?ident
:where
[?status :db/ident ?ident]
] (d/db conn) :instance.status/running))
task-eid (ffirst (q '[:find ?inst
:in $ ?name ?status
:where
[?j :job/name ?name]
[?j :job/instance ?inst]
[?inst :instance/status ?status]
] (d/db conn) name running))
]
@(d/transact
conn
[;; The database can become inconsistent if we make multiple calls to :instance/update-state in a single
;; transaction; see the comment in the definition of :instance/update-state for more details
[:instance/update-state task-eid :instance.status/failed [:reason/name reason-name]]
[:db/add task-eid :instance/reason [:reason/name reason-name]]
[:db/add task-eid :instance/preempted? true]
])))

(update-task-by-name "sometask" :unknown)
(update-task-by-name "sometask" :preempted-by-rebalancer)

(let [conn (d/connect "datomic:mem://mesos-jobs")]
(share/set-share! conn "default" :cpus 20.0 :mem 2500000.0))

(let [conn (d/connect "datomic:mem://mesos-jobs")
db (d/db conn)]
@(d/transact conn [{:db/id :rebalancer/config
:rebalancer.config/max-preemption 64.0
:rebalancer.config/min-dru-diff 0.0000000001
:rebalancer.config/min-utilization-threshold 0.0
:rebalancer.config/safe-dru-threshold 0.0}])))
@@ -739,11 +739,6 @@ for a job. E.g. {:resources {:cpus 4 :mem 3} :constraints {\"unique_host_constra
(def rebalancer-configs
[{:db/id (d/tempid :db.part/user)
:db/ident :rebalancer/config}
{:db/id (d/tempid :db.part/db)
:db/ident :rebalancer.config/min-utilization-threshold
:db/valueType :db.type/double
:db/cardinality :db.cardinality/one
:db.install/_attribute :db.part/db}
{:db/id (d/tempid :db.part/db)
:db/ident :rebalancer.config/safe-dru-threshold
:db/valueType :db.type/double
@@ -1135,8 +1135,7 @@
(deftest test-update-datomic-params-via-config!
(let [datomic-uri "datomic:mem://test-init-state"
conn (restore-fresh-database! datomic-uri)
all-params {:min-utilization-threshold 0.75
:safe-dru-threshold 1.0
all-params {:safe-dru-threshold 1.0
:min-dru-diff 0.5
:max-preemption 64.0}
updated-params {:min-dru-diff 0.75 :max-preemption 128.0}
Oops, something went wrong.

0 comments on commit 3278981

Please sign in to comment.