Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

EA-1110/CI-18: make some global variables configurable via config file

Their default values are not changed. We'll do the value tweaking in the coming checkins.

Signed-off-by: Zheng Li <zheng.li@eu.citrix.com>
  • Loading branch information...
commit a07e137dd29cde05742d0acc278b9358ccf77f1f 1 parent e220713
@zli zli authored Mike McClurg committed
View
14 ocaml/database/block_device_io.ml
@@ -691,7 +691,7 @@ let _ =
if !dump then begin
(* Open the block device *)
- let block_dev_fd = open_block_device !block_dev (Unix.gettimeofday() +. Xapi_globs.redo_log_max_startup_time) in
+ let block_dev_fd = open_block_device !block_dev (Unix.gettimeofday() +. !Xapi_globs.redo_log_max_startup_time) in
R.info "Opened block device.";
let target_response_time = Unix.gettimeofday() +. 3600. in
@@ -745,7 +745,7 @@ let _ =
if !empty then begin
(* Open the block device *)
- let block_dev_fd = open_block_device !block_dev (Unix.gettimeofday() +. Xapi_globs.redo_log_max_startup_time) in
+ let block_dev_fd = open_block_device !block_dev (Unix.gettimeofday() +. !Xapi_globs.redo_log_max_startup_time) in
R.info "Opened block device.";
let target_response_time = Unix.gettimeofday() +. 3600. in
@@ -762,7 +762,7 @@ let _ =
(* Main loop: accept a new client, communicate with it until it stops sending commands, repeat. *)
while true do
let start_of_startup = Unix.gettimeofday() in
- let target_startup_response_time = start_of_startup +. Xapi_globs.redo_log_max_startup_time in
+ let target_startup_response_time = start_of_startup +. !Xapi_globs.redo_log_max_startup_time in
R.debug "Awaiting incoming connections on %s..." !ctrlsock;
let client = accept_conn s target_startup_response_time in
@@ -788,10 +788,10 @@ let _ =
(* Note: none of the action functions throw any exceptions; they report errors directly to the client. *)
let (action_fn, block_time) = match str with
- | "writedelta" -> action_writedelta, Xapi_globs.redo_log_max_block_time_writedelta
- | "writedb___" -> action_writedb, Xapi_globs.redo_log_max_block_time_writedb
- | "read______" -> action_read, Xapi_globs.redo_log_max_block_time_read
- | "empty_____" -> action_empty, Xapi_globs.redo_log_max_block_time_empty
+ | "writedelta" -> action_writedelta, !Xapi_globs.redo_log_max_block_time_writedelta
+ | "writedb___" -> action_writedb, !Xapi_globs.redo_log_max_block_time_writedb
+ | "read______" -> action_read, !Xapi_globs.redo_log_max_block_time_read
+ | "empty_____" -> action_empty, !Xapi_globs.redo_log_max_block_time_empty
| _ -> (fun _ _ _ _ -> send_failure client (str^"|nack") ("Unknown command "^str)), 0.
in
(* "Start the clock!" -- set the latest time by which we need to have responded to the client. *)
View
11 ocaml/database/master_connection.ml
@@ -61,7 +61,6 @@ let with_timestamp f =
One common way this can happen is if we end up blocked waiting for a TCP timeout when the
master goes away unexpectedly... *)
let start_master_connection_watchdog() =
- let connection_reset_timeout = 2. *. 60. in
Thread.create
(fun () ->
while (true)
@@ -73,7 +72,7 @@ let start_master_connection_watchdog() =
| Some t ->
let now = Unix.gettimeofday() in
let since_last_call = now -. t in
- if since_last_call > connection_reset_timeout then
+ if since_last_call > !Xapi_globs.master_connection_reset_timeout then
begin
debug "Master connection timeout: forcibly resetting master connection";
force_connection_reset()
@@ -104,7 +103,7 @@ let open_secure_connection () =
(* Do a db xml_rpc request, catching exception and trying to reopen the connection if it
fails *)
exception Goto_handler
-let connection_timeout = ref 10. (* -ve means retry forever *)
+let connection_timeout = ref !Xapi_globs.master_connection_default_timeout
(* if this is true then xapi will restart if retries exceeded [and enter emergency mode if still
can't reconnect after reboot]. if this is false then xapi will just throw exception if retries
@@ -154,8 +153,8 @@ let do_db_xml_rpc_persistent_with_reopen ~host ~path (req: string) : string =
with
(* TODO: This http exception handler caused CA-36936 and can probably be removed now that there's backoff delay in the generic handler _ below *)
| Http_client.Http_error (http_code,err_msg) ->
- error "Received HTTP error %s (%s) from master. This suggests our master address is wrong. Sleeping for %.0fs and then restarting." http_code err_msg Xapi_globs.permanent_master_failure_retry_timeout;
- Thread.delay Xapi_globs.permanent_master_failure_retry_timeout;
+ error "Received HTTP error %s (%s) from master. This suggests our master address is wrong. Sleeping for %.0fs and then restarting." http_code err_msg !Xapi_globs.permanent_master_failure_retry_interval;
+ Thread.delay !Xapi_globs.permanent_master_failure_retry_interval;
exit Xapi_globs.restart_return_code
| e ->
begin
@@ -180,7 +179,7 @@ let do_db_xml_rpc_persistent_with_reopen ~host ~path (req: string) : string =
end
else
debug "Connection to master died: time taken so far in this call '%f'; will %s"
- time_sofar (if !connection_timeout < 0.
+ time_sofar (if !connection_timeout < 0.
then "never timeout"
else Printf.sprintf "timeout after '%f'" !connection_timeout);
if time_sofar > !connection_timeout && !connection_timeout >= 0. then
View
12 ocaml/database/redo_log.ml
@@ -230,7 +230,7 @@ let connect sockpath latest_response_time =
(* It's probably the case that the process hasn't started yet. *)
(* See if we can afford to wait and try again *)
Unix.close s;
- let attempt_delay = Xapi_globs.redo_log_connect_delay in
+ let attempt_delay = !Xapi_globs.redo_log_connect_delay in
let now = Unix.gettimeofday() in
let remaining = latest_response_time -. now in
if attempt_delay < remaining then begin
@@ -326,7 +326,7 @@ let rec read_read_response sock fn_db fn_delta expected_gen_count latest_respons
let action_empty sock datasockpath =
R.debug "Performing empty";
(* Compute desired response time *)
- let latest_response_time = get_latest_response_time Xapi_globs.redo_log_max_block_time_empty in
+ let latest_response_time = get_latest_response_time !Xapi_globs.redo_log_max_block_time_empty in
(* Empty *)
let str = "empty_____" in
Unixext.time_limited_write sock (String.length str) str latest_response_time;
@@ -346,7 +346,7 @@ let action_empty sock datasockpath =
let action_read fn_db fn_delta sock datasockpath =
R.debug "Performing read";
(* Compute desired response time *)
- let latest_response_time = get_latest_response_time Xapi_globs.redo_log_max_block_time_read in
+ let latest_response_time = get_latest_response_time !Xapi_globs.redo_log_max_block_time_read in
(* Write *)
let str = "read______" in
Unixext.time_limited_write sock (String.length str) str latest_response_time;
@@ -356,7 +356,7 @@ let action_read fn_db fn_delta sock datasockpath =
let action_write_db marker generation_count write_fn sock datasockpath =
R.debug "Performing writedb (generation %Ld)" generation_count;
(* Compute desired response time *)
- let latest_response_time = get_latest_response_time Xapi_globs.redo_log_max_block_time_writedb in
+ let latest_response_time = get_latest_response_time !Xapi_globs.redo_log_max_block_time_writedb in
(* Send write command down control channel *)
let str = Printf.sprintf "writedb___|%s|%016Ld" marker generation_count in
Unixext.time_limited_write sock (String.length str) str latest_response_time;
@@ -413,7 +413,7 @@ let action_write_db marker generation_count write_fn sock datasockpath =
let action_write_delta marker generation_count data flush_db_fn sock datasockpath =
R.debug "Performing writedelta (generation %Ld)" generation_count;
(* Compute desired response time *)
- let latest_response_time = get_latest_response_time Xapi_globs.redo_log_max_block_time_writedelta in
+ let latest_response_time = get_latest_response_time !Xapi_globs.redo_log_max_block_time_writedelta in
(* Write *)
let str = Printf.sprintf "writedelta|%s|%016Ld|%016d|%s" marker generation_count (String.length data) data in
Unixext.time_limited_write sock (String.length str) str latest_response_time;
@@ -569,7 +569,7 @@ let startup log =
match !(log.sock) with
| Some _ -> () (* We're already connected *)
| None ->
- let latest_connect_time = get_latest_response_time Xapi_globs.redo_log_max_startup_time in
+ let latest_connect_time = get_latest_response_time !Xapi_globs.redo_log_max_startup_time in
(* Now connect to the process via the socket *)
let s = connect ctrlsockpath latest_connect_time in
View
10 ocaml/xapi/cli_operations.ml
@@ -899,7 +899,7 @@ let pool_join printer rpc session_id params =
~master_address:(List.assoc "master-address" params)
~master_username:(List.assoc "master-username" params)
~master_password:(List.assoc "master-password" params);
- printer (Cli_printer.PList ["Host agent will restart and attempt to join pool in "^(string_of_int Xapi_globs.fuse_time)^" seconds..."])
+ printer (Cli_printer.PList ["Host agent will restart and attempt to join pool in "^(string_of_float !Xapi_globs.fuse_time)^" seconds..."])
with
| Api_errors.Server_error(code, params) when code=Api_errors.pool_joining_host_connection_failed ->
printer (Cli_printer.PList ["Host cannot contact destination host: connection refused.";
@@ -913,7 +913,7 @@ let pool_eject fd printer rpc session_id params =
let go () =
Client.Pool.eject ~rpc ~session_id ~host;
- printer (Cli_printer.PList ["Specified host will attempt to restart as a master of a new pool in "^(string_of_int Xapi_globs.fuse_time)^" seconds..."]) in
+ printer (Cli_printer.PList ["Specified host will attempt to restart as a master of a new pool in "^(string_of_float !Xapi_globs.fuse_time)^" seconds..."]) in
if force
then go ()
@@ -969,11 +969,11 @@ let pool_eject fd printer rpc session_id params =
let pool_emergency_reset_master printer rpc session_id params =
let master_address = List.assoc "master-address" params in
Client.Pool.emergency_reset_master ~rpc ~session_id ~master_address;
- printer (Cli_printer.PList ["Host agent will restart and become slave of "^master_address^" in "^(string_of_int Xapi_globs.fuse_time)^" seconds..."])
+ printer (Cli_printer.PList ["Host agent will restart and become slave of "^master_address^" in "^(string_of_float !Xapi_globs.fuse_time)^" seconds..."])
let pool_emergency_transition_to_master printer rpc session_id params =
Client.Pool.emergency_transition_to_master ~rpc ~session_id;
- printer (Cli_printer.PList ["Host agent will restart and transition to master in "^(string_of_int Xapi_globs.fuse_time)^" seconds..."])
+ printer (Cli_printer.PList ["Host agent will restart and transition to master in "^(string_of_float !Xapi_globs.fuse_time)^" seconds..."])
let pool_recover_slaves printer rpc session_id params =
let hosts = Client.Pool.recover_slaves ~rpc ~session_id in
@@ -3590,7 +3590,7 @@ let pool_restore_db fd printer rpc session_id params =
ignore(track_http_operation fd rpc session_id make_command "restore database");
if dry_run
then printer (Cli_printer.PList [ "Dry-run backup restore successful" ])
- else printer (Cli_printer.PList ["Host will reboot with restored database in "^(string_of_int Xapi_globs.db_restore_fuse_time)^" seconds..."])
+ else printer (Cli_printer.PList ["Host will reboot with restored database in "^(string_of_float !Xapi_globs.db_restore_fuse_time)^" seconds..."])
let pool_enable_external_auth printer rpc session_id params =
View
12 ocaml/xapi/db_gc.ml
@@ -194,7 +194,7 @@ let check_host_liveness ~__context =
let now = Unix.gettimeofday () in
(* we can now compare 'host_time' with 'now' *)
- if now -. host_time < Xapi_globs.host_assumed_dead_interval then begin
+ if now -. host_time < !Xapi_globs.host_assumed_dead_interval then begin
(* From the heartbeat PoV the host looks alive. We try to (i) minimise database sets; and (ii)
avoid toggling the host back to live if it has been marked as shutting_down. *)
Mutex.execute Xapi_globs.hosts_which_are_shutting_down_m
@@ -252,7 +252,7 @@ let timeout_sessions_common ~__context sessions =
(* Only keep a list of (ref, last_active, uuid) *)
let disposable_sessions = List.map (fun (x, y) -> x, Date.to_float y.Db_actions.session_last_active, y.Db_actions.session_uuid) disposable_sessions in
(* Definitely invalidate sessions last used long ago *)
- let threshold_time = Unix.time () -. Xapi_globs.inactive_session_timeout in
+ let threshold_time = Unix.time () -. !Xapi_globs.inactive_session_timeout in
let young, old = List.partition (fun (_, y, _) -> y > threshold_time) disposable_sessions in
(* If there are too many young sessions then we need to delete the oldest *)
let lucky, unlucky =
@@ -286,8 +286,8 @@ let timeout_sessions ~__context =
let timeout_tasks ~__context =
let all_tasks = Db.Task.get_internal_records_where ~__context ~expr:Db_filter_types.True in
- let oldest_completed_time = Unix.time() -. Xapi_globs.completed_task_timeout (* time out completed tasks after 65 minutes *) in
- let oldest_pending_time = Unix.time() -. Xapi_globs.pending_task_timeout (* time out pending tasks after 24 hours *) in
+ let oldest_completed_time = Unix.time() -. !Xapi_globs.completed_task_timeout (* time out completed tasks after 65 minutes *) in
+ let oldest_pending_time = Unix.time() -. !Xapi_globs.pending_task_timeout (* time out pending tasks after 24 hours *) in
let should_delete_task (_, t) =
if task_status_is_completed t.Db_actions.task_status
@@ -533,7 +533,7 @@ let start_heartbeat_thread() =
while(true) do
try
- Thread.delay Xapi_globs.host_heartbeat_interval;
+ Thread.delay !Xapi_globs.host_heartbeat_interval;
send_one_heartbeat ~__context rpc session_id
with
| (Api_errors.Server_error (x,y)) as e ->
@@ -549,6 +549,6 @@ let start_heartbeat_thread() =
exit Xapi_globs.restart_return_code
| e ->
debug "Caught %s - logging in again" (ExnHelper.string_of_exn e);
- Thread.delay Xapi_globs.host_heartbeat_interval;
+ Thread.delay !Xapi_globs.host_heartbeat_interval;
done
end)
View
6 ocaml/xapi/events.ml
@@ -125,7 +125,7 @@ module Domain_shutdown = struct
if Xapi_fist.disable_reboot_delay () then begin
debug "FIST: disable_reboot_delay";
0, 0
- end else if time_vm_ran_for ~__context ~vm < Xapi_globs.minimum_time_between_reboot_with_no_added_delay then begin
+ end else if time_vm_ran_for ~__context ~vm < !Xapi_globs.minimum_time_between_reboot_with_no_added_delay then begin
calculate_reboot_delay ~__context ~vm domid
end else 0, 0 in
if delay <> 0 then begin
@@ -189,9 +189,9 @@ module Domain_shutdown = struct
(* Perform bounce-suppression to prevent fast crash loops *)
let action =
let t = time_vm_ran_for ~__context ~vm in
- if t < Xapi_globs.minimum_time_between_bounces then begin
+ if t < !Xapi_globs.minimum_time_between_bounces then begin
let msg = Printf.sprintf "VM (%s) domid %d crashed too soon after start (ran for %f; minimum time %f)"
- (Db.VM.get_name_label ~__context ~self:vm) domid t Xapi_globs.minimum_time_between_bounces in
+ (Db.VM.get_name_label ~__context ~self:vm) domid t !Xapi_globs.minimum_time_between_bounces in
match action with
| `coredump_and_restart ->
debug "%s: converting coredump_and_restart -> coredump_and_destroy" msg;
View
18 ocaml/xapi/message_forwarding.ml
@@ -2682,17 +2682,15 @@ module Forward = functor(Local: Custom_actions.CUSTOM_ACTIONS) -> struct
let (_: Thread.t) = Thread.create (fun () ->
Client.PIF.reconfigure_ip rpc session_id self mode iP netmask gateway dNS) () in
let task_id = Context.get_task_id __context in
- let rec poll i =
- if i>300 then failwith "Failed to see host on network after timeout expired";
+ let start_time = Unix.gettimeofday () in
+ let progress = ref 0.0 in
+ while !progress = 0.0 do
+ if Unix.gettimeofday () -. start_time < !Xapi_globs.pif_reconfigure_ip_timeout then
+ failwith "Failed to see host on network after timeout expired";
Thread.delay 1.0;
- debug "Polling task %s progress" (Ref.string_of task_id);
- let progress = Db.Task.get_progress ~__context ~self:task_id in
- debug "progress=%f" progress;
- if progress=0.0
- then poll (i+1)
- else ()
- in
- poll 0)
+ progress := Db.Task.get_progress ~__context ~self:task_id;
+ debug "Polling task %s progress" (Ref.string_of task_id)
+ done)
let scan ~__context ~host =
info "PIF.scan: host = '%s'" (host_uuid ~__context host);
View
10 ocaml/xapi/pool_db_backup.ml
@@ -23,8 +23,6 @@ open Db_cache_types
module D = Debug.Debugger(struct let name="pool_db_sync" end)
open D
-let pool_db_sync_timer = 60.0 *. 5. (* CA-16878: 5 minutes, same as the local database flush *)
-
let octet_stream = Http.Hdr.content_type ^": application/octet-stream"
(* CA-18377: The smallest database that is compatible with the Miami database schema. *)
@@ -203,8 +201,8 @@ let push_database_restore_handler (req: Http.Request.t) s _ =
(* now restart *)
debug "xapi has received new database via xml; will reboot and use that db...";
- info "Rebooting to use restored database after delay of: %d" Xapi_globs.db_restore_fuse_time;
- Xapi_fuse.light_fuse_and_reboot ~fuse_length:Xapi_globs.db_restore_fuse_time ();
+ info "Rebooting to use restored database after delay of: %f" !Xapi_globs.db_restore_fuse_time;
+ Xapi_fuse.light_fuse_and_reboot ~fuse_length:!Xapi_globs.db_restore_fuse_time ();
end
)
@@ -263,7 +261,7 @@ let pool_db_backup_thread () =
let generation = Db_lock.with_lock (fun () -> Manifest.generation (Database.manifest (Db_ref.get_database (Context.database_of __context)))) in
let dohost host =
try
- Thread.delay pool_db_sync_timer;
+ Thread.delay !Xapi_globs.pool_db_sync_interval;
debug "Starting DB synchronise with host %s" (Ref.string_of host);
Helpers.call_api_functions ~__context
(fun rpc session_id -> Client.Host.request_backup rpc session_id host generation false);
@@ -274,7 +272,7 @@ let pool_db_backup_thread () =
log_backtrace () in
(* since thread.delay is inside dohost fn make sure we don't spin if hosts=[]: *)
- if hosts=[] then Thread.delay pool_db_sync_timer
+ if hosts=[] then Thread.delay !Xapi_globs.pool_db_sync_interval
else List.iter dohost hosts;
end
with e -> debug "Exception in DB synchronise thread: %s" (ExnHelper.string_of_exn e)
View
10 ocaml/xapi/vmops.ml
@@ -832,9 +832,8 @@ let clean_shutdown_with_reason ?(at = fun _ -> ()) ~xal ~__context ~self ?(rel_t
(* Wait for up to 60s for the VM to acknowledge the shutdown request. In case the guest
misses our original request, keep making additional ones. *)
let finished = ref false in
- let timeout = 60.0 in
let start = Unix.gettimeofday () in
- while Unix.gettimeofday () -. start < timeout && not !finished do
+ while Unix.gettimeofday () -. start < !Xapi_globs.domain_shutdown_ack_timeout && not !finished do
try
(* Make the shutdown request: this will fail if the domain has vanished. *)
Domain.shutdown ~xs domid reason;
@@ -852,12 +851,11 @@ let clean_shutdown_with_reason ?(at = fun _ -> ()) ~xal ~__context ~self ?(rel_t
if not !finished then raise (Api_errors.Server_error (Api_errors.vm_failed_shutdown_ack, []))
end;
at 0.50;
- let total_timeout = 60. *. 60. in (* 1 hour *)
(* Block for 5s at a time, in between check to see whether we've been cancelled
and update our progress if not *)
let start = Unix.gettimeofday () in
let result = ref None in
- while (Unix.gettimeofday () -. start < total_timeout) && (!result = None) do
+ while (Unix.gettimeofday () -. start < !Xapi_globs.domain_shutdown_total_timeout) && (!result = None) do
try
debug "MTC: calling xal.wait_release timeout=%f" rel_timeout;
Xs.monitor_paths xs [ "@releaseDomain","X" ] rel_timeout
@@ -868,11 +866,11 @@ let clean_shutdown_with_reason ?(at = fun _ -> ()) ~xal ~__context ~self ?(rel_t
if reason <> Domain.Suspend && TaskHelper.is_cancelling ~__context
then raise (Api_errors.Server_error(Api_errors.task_cancelled, [ Ref.string_of (Context.get_task_id __context) ]));
(* Update progress and repeat *)
- let progress = min ((Unix.gettimeofday () -. start) /. total_timeout) 1. in
+ let progress = min ((Unix.gettimeofday () -. start) /. !Xapi_globs.domain_shutdown_total_timeout) 1. in
at (0.50 +. 0.25 *. progress)
done;
match !result with
- | None -> raise (Api_errors.Server_error(Api_errors.vm_shutdown_timeout, [ Ref.string_of self; string_of_float total_timeout ]))
+ | None -> raise (Api_errors.Server_error(Api_errors.vm_shutdown_timeout, [ Ref.string_of self; string_of_float !Xapi_globs.domain_shutdown_total_timeout ]))
| Some x ->
at 1.0;
x
View
19 ocaml/xapi/xapi.ml
@@ -496,11 +496,11 @@ let server_run_in_emergency_mode () =
Xapi_globs.slave_emergency_mode := true;
(* signal the init script that it should succeed even though we're bust *)
Helpers.touch_file !Xapi_globs.ready_file;
-
- let emergency_reboot_timer = 60. +. (float_of_int (Random.int 120)) (* restart after 1--3 minute delay *) in
- info "Will restart management software in %.1f seconds" emergency_reboot_timer;
+
+ let emergency_reboot_delay = !Xapi_globs.emergency_reboot_delay_base +. Random.float !Xapi_globs.emergency_reboot_delay_extra in
+ info "Will restart management software in %.1f seconds" emergency_reboot_delay;
(* in emergency mode we reboot to try reconnecting every "emergency_reboot_timer" period *)
- let (* reboot_thread *) _ = Thread.create (fun ()->Thread.delay emergency_reboot_timer; exit Xapi_globs.restart_return_code) () in
+ let (* reboot_thread *) _ = Thread.create (fun ()->Thread.delay emergency_reboot_delay; exit Xapi_globs.restart_return_code) () in
wait_to_die();
exit 0
@@ -824,7 +824,8 @@ let server_init() =
Server_helpers.exec_with_new_task "server_init" (fun __context ->
Startup.run ~__context [
"Reading config file", [], (fun () -> Xapi_config.read_config !Xapi_globs.config_file);
- "Reading log config file", [ Startup.NoExnRaising ], (fun () -> Xapi_config.read_log_config !Xapi_globs.log_config_file);
+ "Reading log config file", [ Startup.NoExnRaising ], (fun () ->Xapi_config.read_log_config !Xapi_globs.log_config_file);
+ "Reading external global variables definition", [ Startup.NoExnRaising ], Xapi_globs.read_external_config;
"Initing stunnel path", [], Stunnel.init_stunnel_path;
"XAPI SERVER STARTING", [], print_server_starting_message;
"Parsing inventory file", [], Xapi_inventory.read_inventory;
@@ -891,14 +892,14 @@ let server_init() =
debug "I think the error is a temporary one, retrying in 5s";
Thread.delay 5.;
| Some Permanent ->
- error "Permanent error in Pool.hello, will retry after %.0fs just in case" Xapi_globs.permanent_master_failure_retry_timeout;
- Thread.delay Xapi_globs.permanent_master_failure_retry_timeout
+ error "Permanent error in Pool.hello, will retry after %.0fs just in case" !Xapi_globs.permanent_master_failure_retry_interval;
+ Thread.delay !Xapi_globs.permanent_master_failure_retry_interval
end;
done;
debug "Startup successful";
Xapi_globs.slave_emergency_mode := false;
Master_connection.connection_timeout := initial_connection_timeout;
-
+
begin
try
(* We can't tolerate an exception in db synchronization so fall back into emergency mode
@@ -914,7 +915,7 @@ let server_init() =
server_run_in_emergency_mode()
end
end;
- Master_connection.connection_timeout := Xapi_globs.master_connect_retry_timeout;
+ Master_connection.connection_timeout := !Xapi_globs.master_connection_retry_timeout;
Master_connection.restart_on_connection_timeout := true;
Master_connection.on_database_connection_established := (fun () -> on_master_restart ~__context);
end;
View
14 ocaml/xapi/xapi_fuse.ml
@@ -25,10 +25,10 @@ let time f =
Unix.gettimeofday () -. start
(* give xapi time to reply to API messages by means of a 10 second fuse! *)
-let light_fuse_and_run ?(fuse_length=Xapi_globs.fuse_time) () =
+let light_fuse_and_run ?(fuse_length = !Xapi_globs.fuse_time) () =
debug "light_fuse_and_run: calling Monitor_rrds.backup to save current RRDs locally";
let delay_so_far = time Monitor_rrds.backup in
- let new_fuse_length = max 5. (float_of_int fuse_length -. delay_so_far) in
+ let new_fuse_length = max 5. (fuse_length -. delay_so_far) in
debug "light_fuse_and_run: current RRDs have been saved";
ignore (Thread.create
(fun ()->
@@ -49,25 +49,25 @@ let light_fuse_and_run ?(fuse_length=Xapi_globs.fuse_time) () =
let light_fuse_and_reboot_after_eject() =
ignore (Thread.create
(fun ()->
- Thread.delay (float_of_int Xapi_globs.fuse_time);
+ Thread.delay !Xapi_globs.fuse_time;
(* this activates firstboot script and reboots the host *)
ignore (Forkhelpers.execute_command_get_output "/sbin/service" [ "firstboot"; "activate" ]);
()
) ())
-let light_fuse_and_reboot ?(fuse_length=Xapi_globs.fuse_time) () =
+let light_fuse_and_reboot ?(fuse_length = !Xapi_globs.fuse_time) () =
ignore (Thread.create
(fun ()->
- Thread.delay (float_of_int fuse_length);
+ Thread.delay fuse_length;
ignore(Sys.command "shutdown -r now")
) ())
-let light_fuse_and_dont_restart ?(fuse_length=Xapi_globs.fuse_time) () =
+let light_fuse_and_dont_restart ?(fuse_length = !Xapi_globs.fuse_time) () =
ignore (Thread.create
(fun () ->
debug "light_fuse_and_dont_restart: calling Monitor_rrds.backup to save current RRDs locally";
Monitor_rrds.backup ();
- Thread.delay (float_of_int fuse_length);
+ Thread.delay fuse_length;
Db_cache_impl.flush_and_exit (Db_connections.preferred_write_db ()) 0) ());
(* This is a best-effort attempt to use the database. We must not block the flush_and_exit above, hence
the use of a background thread. *)
View
299 ocaml/xapi/xapi_globs.ml
@@ -11,12 +11,13 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*)
-
+
(** A central location for settings related to xapi *)
-
+
open Stringext
open Printf
-open Util_globs_inventory
+
+module D = Debug.Debugger(struct let name="xapi_globs" end)
(* xapi process returns this code on exit when it wants to be restarted *)
let restart_return_code = 123
@@ -67,14 +68,6 @@ let local_storage_unix_domain_socket = "/var/xapi/storage-local"
let storage_unix_domain_socket = "/var/xapi/storage"
let local_database = "/var/xapi/local.db"
-(* amount of time to retry master_connection before (if restart_on_connection_timeout is set) restarting xapi; -ve means don't timeout: *)
-let master_connect_retry_timeout = -1. (* never timeout *)
-
-(* the time taken to wait before restarting in a different mode for pool eject/join operations *)
-let fuse_time = 10
-
-(* the time taken to wait before restarting after restoring db backup *)
-let db_restore_fuse_time = 30
(* if a slave in emergency "cannot see master mode" then this flag is set *)
let slave_emergency_mode = ref false
@@ -83,11 +76,6 @@ let slave_emergency_mode = ref false
without trawling through logfiles *)
let emergency_mode_error = ref (Api_errors.Server_error(Api_errors.host_still_booting, []))
-(* Interval between host heartbeats *)
-let host_heartbeat_interval = 30.0
-(* If we haven't heard a heartbeat from a host for this interval then the host is assumed dead *)
-let host_assumed_dead_interval = 600.0 (* 10 minutes *)
-
let http_realm = "xapi"
(* Special XS entry looked for by the XenSource PV drivers (see xenagentd.hg:src/xad.c) *)
@@ -232,9 +220,6 @@ let migration_failure_test_key = "migration_wings_fall_off" (* set in other-conf
the disk flushing *)
let migration_extra_paths_key = "migration_extra_paths"
-(* If a session has a last_active older than this we delete it *)
-let inactive_session_timeout = 24. *. 60. *. 60. (* 24 hrs in seconds *)
-
(* After this we start to delete completed tasks (never pending ones) *)
let max_tasks = 200
@@ -242,21 +227,6 @@ let max_tasks = 200
(* We must allow for more sessions than running tasks *)
let max_sessions = max_tasks * 2
-let completed_task_timeout = 65. *. 60. (* 65 mins *)
-
-let pending_task_timeout = 24. *. 60. *. 60. (* 24 hrs in seconds *)
-
-(* After this we start to delete alerts *)
-let alert_timeout = completed_task_timeout +. 1.
-
-(* Don't reboot a domain which crashes too quickly: *)
-let minimum_time_between_bounces = 120. (* 2 minutes *)
-
-(* If a domain is rebooted (from inside) in less than this time since it last started, then insert an artificial delay: *)
-let minimum_time_between_reboot_with_no_added_delay = 60. (* 1 minute *)
-(* the size of the artificial delay is: *)
-let artificial_reboot_delay = 30.
-
(* The Unix.time that represents the maximum time in the future that a 32 bit time can cope with *)
let the_future = 2147483647.0
@@ -282,23 +252,11 @@ let pool_ha_num_host_failures = "ha_tolerated_host_failures"
(* the other-config key that reflects whether the pool is overprovisioned *)
let pool_ha_currently_over_provisioned = "ha_currently_over_provisioned"
-let ha_monitor_timer = 20. (* seconds *)
-
-let ha_monitor_startup_timeout = 30. *. 60. (* seconds *)
-
-(* Unconditionally replan every once in a while just in case the overcommit protection is buggy and we don't notice *)
-let ha_monitor_plan_timer = 30. *. 60. (* seconds *)
-
let backup_db = "/var/xapi/state-backup.db"
(* Place where database XML backups are kept *)
let backup_db_xml = "/var/xapi/state-backup.xml"
-(* Time to wait before fencing in the case when this host isn't a master, isn't in
- emergency mode and has no running VMs before fencing. This is intended to give
- the admin some time to fix a broken configuration.*)
-let noncritical_fence_timeout = 5. *. 60. (* 5 minutes *)
-
(* Directory containing scripts which are executed when a node becomes master
and when a node gives up the master role *)
let master_scripts_dir = "/etc/xensource/master.d"
@@ -418,8 +376,6 @@ let http_limit_max_rpc_size = 300 * 1024 (* 300K *)
let http_limit_max_cli_size = 200 * 1024 (* 200K *)
let http_limit_max_rrd_size = 2 * 1024 * 1024 (* 2M -- FIXME : need to go below 1mb for security purpose. *)
-let sync_timer = 3600.0 *. 24.0 (* sync once a day *)
-
let message_limit=10000
let xapi_message_script = base_path ^ "/libexec/mail-alarm"
@@ -430,7 +386,7 @@ let max_clock_skew = 5. *. 60. (* 5 minutes *)
(* Optional directory containing XenAPI plugins *)
let xapi_plugins_root = "/etc/xapi.d/plugins"
-let guest_liveness_timeout = 5.0 *. 60.0
+
(** CA-18377: Providing lists of operations that were supported by the Miami release. *)
(** For now, we check against these lists when sending data across the wire that may *)
@@ -516,24 +472,6 @@ let memory_ratio_pv = ("memory-ratio-pv", "0.25")
(** The maximum allowed number of redo_log instances. *)
let redo_log_max_instances = 8
-(** The maximum time, in seconds, for which we are prepared to wait for a response from the block device I/O process before assuming that it has died while emptying *)
-let redo_log_max_block_time_empty = 2.
-
-(** The maximum time, in seconds, for which we are prepared to wait for a response from the block device I/O process before assuming that it has died while reading *)
-let redo_log_max_block_time_read = 30.
-
-(** The maximum time, in seconds, for which we are prepared to wait for a response from the block device I/O process before assuming that it has died while writing a delta *)
-let redo_log_max_block_time_writedelta = 2.
-
-(** The maximum time, in seconds, for which we are prepared to wait for a response from the block device I/O process before assuming that it has died while writing a database *)
-let redo_log_max_block_time_writedb = 30.
-
-(** The maximum time, in seconds, for which we are prepared to wait for a response from the block device I/O process before assuming that it has died while initially connecting to it *)
-let redo_log_max_startup_time = 5.
-
-(** The delay between each attempt to connect to the block device I/O process *)
-let redo_log_connect_delay = 0.1
-
(** The prefix of the file used as a socket to communicate with the block device I/O process *)
let redo_log_comms_socket_stem = "sock-blkdev-io"
@@ -578,10 +516,6 @@ let event_hook_auth_on_xapi_initialize_succeeded = ref false
(** Directory used by the v6 license policy engine for caching *)
let upgrade_grace_file = "/var/xapi/ugp"
-
-(** Time after which we conclude that a VM really is unco-operative *)
-let cooperative_timeout = 30.
-
(** Where the ballooning daemon writes the initial overhead value *)
let squeezed_reserved_host_memory = "/squeezed/reserved-host-memory"
@@ -617,7 +551,7 @@ let old_dell_bios_strings =
"oem-2", "5[0000]";
"oem-3", "MS_VM_CERT/SHA1/bdbeb6e0a816d43fa6d3fe8aaef04c2bad9d3e3d";
"hp-rombios", ""]
-
+
(** BIOS strings of the old (XS 5.5) HP Edition *)
let old_hp_bios_strings =
["bios-vendor", "Xen";
@@ -630,9 +564,6 @@ let old_hp_bios_strings =
"oem-2", "MS_VM_CERT/SHA1/bdbeb6e0a816d43fa6d3fe8aaef04c2bad9d3e3d";
"hp-rombios", "COMPAQ"]
-
-let permanent_master_failure_retry_timeout = 1. *. 60. (* 1 minute *)
-
(** {2 CPUID feature masking} *)
(** Pool.other_config key to hold the user-defined feature mask, used to
@@ -647,3 +578,221 @@ let network_reset_trigger = "/tmp/network-reset"
let first_boot_dir = "/etc/firstboot.d/"
+
+(** Dynamic configurations to be read whenever xapi (re)start *)
+
+let master_connection_reset_timeout = ref 120.
+
+(* amount of time to retry master_connection before (if
+ restart_on_connection_timeout is set) restarting xapi; -ve means don't
+ timeout: *)
+let master_connection_retry_timeout = ref (-1.)
+
+let master_connection_default_timeout = ref 10.
+
+let qemu_dm_ready_timeout = ref 1200.
+
+(* seconds per balancing check *)
+let squeezed_balance_check_interval = ref 10.
+
+(* Time we allow for the hotplug scripts to run before we assume something bad
+ has happened and abort *)
+let hotplug_timeout = ref 1200.
+
+let pif_reconfigure_ip_timeout = ref 300.
+
+(* CA-16878: 5 minutes, same as the local database flush *)
+let pool_db_sync_interval = ref 300.
+(* blob/message/rrd file syncing - sync once a day *)
+let pool_data_sync_interval = ref 86400.
+
+let domain_shutdown_ack_timeout = ref 10.
+let domain_shutdown_total_timeout = ref 3600.
+
+(* The actual reboot delay will be a random value between base and base + extra *)
+let emergency_reboot_delay_base = ref 60.
+let emergency_reboot_delay_extra = ref 120.
+
+let ha_xapi_healthcheck_interval = ref 60
+let ha_xapi_healthcheck_timeout = ref 120 (* > the number of attempts in xapi-health-check script *)
+let ha_xapi_restart_attempts = ref 1
+let ha_xapi_restart_timeout = ref 300 (* 180s is max start delay and 60s max shutdown delay in the initscript *)
+
+(* Logrotate - poll the amount of data written out by the logger, and call
+ logrotate when it exceeds the threshold *)
+let logrotate_check_interval = ref 300.
+
+let rrd_backup_interval = ref 86400.
+
+(* CP-703: Periodic revalidation of externally-authenticated sessions *)
+let session_revalidation_interval = ref 300. (* every 5 minutes *)
+
+(* CP-820: other-config field in subjects should be periodically refreshed *)
+let update_all_subjects_interval = ref 900. (* every 15 minutes *)
+
+(* The default upper bound on the length of time to wait for a running VM to
+ reach its current memory target. *)
+let wait_memory_target_timeout = ref 256.
+
+let snapshot_with_quiesce_timeout = ref 300.
+
+(* Interval between host heartbeats *)
+let host_heartbeat_interval = ref 30.
+
+(* If we haven't heard a heartbeat from a host for this interval then the host is assumed dead *)
+let host_assumed_dead_interval = ref 600.0
+
+(* the time taken to wait before restarting in a different mode for pool eject/join operations *)
+let fuse_time = ref 10.
+
+(* the time taken to wait before restarting after restoring db backup *)
+let db_restore_fuse_time = ref 30.
+
+(* If a session has a last_active older than this we delete it *)
+let inactive_session_timeout = ref 86400. (* 24 hrs in seconds *)
+
+let pending_task_timeout = ref 86400. (* 24 hrs in seconds *)
+
+let completed_task_timeout = ref 3900. (* 65 mins *)
+
+(* Don't reboot a domain which crashes too quickly: *)
+let minimum_time_between_bounces = ref 120. (* 2 minutes *)
+
+(* If a domain is rebooted (from inside) in less than this time since it last
+ started, then insert an artificial delay: *)
+let minimum_time_between_reboot_with_no_added_delay = ref 60. (* 1 minute *)
+
+let ha_monitor_interval = ref 20.
+(* Unconditionally replan every once in a while just in case the overcommit
+ protection is buggy and we don't notice *)
+let ha_monitor_plan_interval = ref 1800.
+
+let ha_monitor_startup_timeout = ref 1800.
+
+let ha_default_timeout_base = ref 60.
+
+let guest_liveness_timeout = ref 300.
+
+let permanent_master_failure_retry_interval = ref 60.
+
+(** The maximum time, in seconds, for which we are prepared to wait for a response from the block device I/O process before assuming that it has died while emptying *)
+let redo_log_max_block_time_empty = ref 2.
+
+(** The maximum time, in seconds, for which we are prepared to wait for a response from the block device I/O process before assuming that it has died while reading *)
+let redo_log_max_block_time_read = ref 30.
+
+(** The maximum time, in seconds, for which we are prepared to wait for a response from the block device I/O process before assuming that it has died while writing a delta *)
+let redo_log_max_block_time_writedelta = ref 2.
+
+(** The maximum time, in seconds, for which we are prepared to wait for a response from the block device I/O process before assuming that it has died while writing a database *)
+let redo_log_max_block_time_writedb = ref 30.
+
+(** The maximum time, in seconds, for which we are prepared to wait for a response from the block device I/O process before assuming that it has died while initially connecting to it *)
+let redo_log_max_startup_time = ref 5.
+
+(** The delay between each attempt to connect to the block device I/O process *)
+let redo_log_connect_delay = ref 0.1
+
+let xapi_globs_spec =
+ [ "master_connection_reset_timeout",
+ Config.Set_float master_connection_reset_timeout;
+ "master_connection_retry_timeout",
+ Config.Set_float master_connection_retry_timeout;
+ "master_connection_default_timeout",
+ Config.Set_float master_connection_default_timeout;
+ "qemu_dm_ready_timeout",
+ Config.Set_float qemu_dm_ready_timeout;
+ "squeezed_balance_check_interval",
+ Config.Set_float squeezed_balance_check_interval;
+ "hotplug_timeout",
+ Config.Set_float hotplug_timeout;
+ "pif_reconfigure_ip_timeout",
+ Config.Set_float pif_reconfigure_ip_timeout;
+ "pool_db_sync_interval",
+ Config.Set_float pool_db_sync_interval;
+ "pool_data_sync_interval",
+ Config.Set_float pool_data_sync_interval;
+ "domain_shutdown_ack_timeout",
+ Config.Set_float domain_shutdown_ack_timeout;
+ "domain_shutdown_total_timeout",
+ Config.Set_float domain_shutdown_total_timeout;
+ "emergency_reboot_delay_base",
+ Config.Set_float emergency_reboot_delay_base;
+ "emergency_reboot_delay_extra",
+ Config.Set_float emergency_reboot_delay_extra;
+ "ha_xapi_healthcheck_interval",
+ Config.Set_int ha_xapi_healthcheck_interval;
+ "ha_xapi_healthcheck_timeout",
+ Config.Set_int ha_xapi_healthcheck_timeout;
+ "ha_xapi_restart_attempts",
+ Config.Set_int ha_xapi_restart_attempts;
+ "ha_xapi_restart_timeout",
+ Config.Set_int ha_xapi_restart_timeout;
+ "logrotate_check_interval",
+ Config.Set_float logrotate_check_interval;
+ "rrd_backup_interval",
+ Config.Set_float rrd_backup_interval;
+ "session_revalidation_interval",
+ Config.Set_float session_revalidation_interval;
+ "update_all_subjects_interval",
+ Config.Set_float update_all_subjects_interval;
+ "wait_memory_target_timeout",
+ Config.Set_float wait_memory_target_timeout;
+ "snapshot_with_quiesce_timeout",
+ Config.Set_float snapshot_with_quiesce_timeout;
+ "host_heartbeat_interval",
+ Config.Set_float host_heartbeat_interval;
+ "host_assumed_dead_interval",
+ Config.Set_float host_assumed_dead_interval;
+ "fuse_time",
+ Config.Set_float fuse_time;
+ "db_restore_fuse_time",
+ Config.Set_float db_restore_fuse_time;
+ "inactive_session_timeout",
+ Config.Set_float inactive_session_timeout;
+ "pending_task_timeout",
+ Config.Set_float pending_task_timeout;
+ "completed_task_timeout",
+ Config.Set_float completed_task_timeout;
+ "minimum_time_between_bounces",
+ Config.Set_float minimum_time_between_bounces;
+ "minimum_time_between_reboot_with_no_added_delay",
+ Config.Set_float minimum_time_between_reboot_with_no_added_delay;
+ "ha_monitor_interval",
+ Config.Set_float ha_monitor_interval;
+ "ha_monitor_plan_interval",
+ Config.Set_float ha_monitor_plan_interval;
+ "ha_monitor_startup_timeout",
+ Config.Set_float ha_monitor_startup_timeout;
+ "ha_default_timeout_base",
+ Config.Set_float ha_default_timeout_base;
+ "guest_liveness_timeout",
+ Config.Set_float guest_liveness_timeout;
+ "permanent_master_failure_retry_interval",
+ Config.Set_float permanent_master_failure_retry_interval;
+ "redo_log_max_block_time_empty",
+ Config.Set_float redo_log_max_block_time_empty;
+ "redo_log_max_block_time_read",
+ Config.Set_float redo_log_max_block_time_read;
+ "redo_log_max_block_time_writedelta",
+ Config.Set_float redo_log_max_block_time_writedelta;
+ "redo_log_max_block_time_writedb",
+ Config.Set_float redo_log_max_block_time_writedb;
+ "redo_log_max_startup_time",
+ Config.Set_float redo_log_max_startup_time;
+ "redo_log_connect_delay",
+ Config.Set_float redo_log_connect_delay;
+ ]
+
+let xapi_globs_conf = "/etc/xensource/xapi_globs.conf"
+
+let read_external_config () =
+ let unknown_key k v = D.warn "Unknown key/value pairs: (%s, %s)" k v in
+ if Sys.file_exists xapi_globs_conf then begin
+ (* Will raise exception if xapi_globs.conf is mis-formatted. It's up to the
+ caller to inspect and handle the failure.
+ *)
+ Config.read xapi_globs_conf xapi_globs_spec unknown_key;
+ D.info "Read global variables successfully from %s" xapi_globs_conf
+ end
+
View
6 ocaml/xapi/xapi_guest_agent.ml
@@ -242,7 +242,7 @@ let guest_metrics_liveness_thread () =
(fun __context ->
while true do
try
- Thread.delay Xapi_globs.guest_liveness_timeout;
+ Thread.delay !Xapi_globs.guest_liveness_timeout;
let doms = Xenctrl.domain_getinfolist xc 1 in (* no guest agent in dom0 *)
let now = Unix.gettimeofday () in
(* debug "Running liveness logic"; *)
@@ -256,7 +256,7 @@ let guest_metrics_liveness_thread () =
begin
(* debug "Domain %d thought to be dead" domid; *)
(* If it's marked as dead, check if we've received any update recently *)
- if now -. last_updated < Xapi_globs.guest_liveness_timeout then
+ if now -. last_updated < !Xapi_globs.guest_liveness_timeout then
begin
(* debug "Marking as alive!"; *)
(* Mark guest as alive! *)
@@ -272,7 +272,7 @@ let guest_metrics_liveness_thread () =
begin
(* debug "Domain %d thought to be live" domid; *)
(* If it's marked as alive, check if we've received any update recently *)
- if now -. last_updated > Xapi_globs.guest_liveness_timeout then
+ if now -. last_updated > !Xapi_globs.guest_liveness_timeout then
begin
(* debug "Marking as dead!"; *)
(* Mark guest as dead! *)
View
24 ocaml/xapi/xapi_ha.ml
@@ -214,10 +214,10 @@ module Timeouts = struct
boot_join_timeout = boot_join_timeout;
enable_join_timeout = enable_join_timeout;
- xapi_healthcheck_interval = 60;
- xapi_healthcheck_timeout = 120; (* > the number of attempts in xapi-health-check script *)
- xapi_restart_attempts = 1;
- xapi_restart_timeout = 300; (* 180s is max start delay and 60s max shutdown delay in the initscript *)
+ xapi_healthcheck_interval = !Xapi_globs.ha_xapi_healthcheck_interval;
+ xapi_healthcheck_timeout = !Xapi_globs.ha_xapi_healthcheck_timeout;
+ xapi_restart_attempts = !Xapi_globs.ha_xapi_restart_attempts;
+ xapi_restart_timeout = !Xapi_globs.ha_xapi_restart_timeout; (* 180s is max start delay and 60s max shutdown delay in the initscript *)
}
(** Returns the base timeout value from which the rest are derived *)
@@ -234,7 +234,7 @@ module Timeouts = struct
else
if List.mem_assoc Xapi_globs.default_ha_timeout other_config
then int_of_string (List.assoc Xapi_globs.default_ha_timeout other_config)
- else 60 in
+ else int_of_float !Xapi_globs.ha_default_timeout_base in
t
end
@@ -520,7 +520,7 @@ module Monitor = struct
end;
let now = Unix.gettimeofday () in
- let plan_too_old = now -. !last_plan_time > Xapi_globs.ha_monitor_plan_timer in
+ let plan_too_old = now -. !last_plan_time > !Xapi_globs.ha_monitor_plan_interval in
if plan_too_old || !plan_out_of_date then begin
let changed = Xapi_ha_vm_failover.update_pool_status ~__context in
@@ -541,12 +541,12 @@ module Monitor = struct
Condition.wait database_state_valid_c thread_m
done);
- info "Master HA startup waiting for up to %.2f for slaves in the liveset to report in and enable themselves" Xapi_globs.ha_monitor_startup_timeout;
+ info "Master HA startup waiting for up to %.2f for slaves in the liveset to report in and enable themselves" !Xapi_globs.ha_monitor_startup_timeout;
let start = Unix.gettimeofday () in
let finished = ref false in
while Mutex.execute m (fun () -> not(!request_shutdown)) && not(!finished) do
try
- ignore(Delay.wait delay Xapi_globs.ha_monitor_timer);
+ ignore(Delay.wait delay !Xapi_globs.ha_monitor_interval);
if Mutex.execute m (fun () -> not(!request_shutdown)) then begin
let liveset = query_liveset_on_all_hosts () in
let uuids = List.map Uuid.string_of_uuid (uuids_of_liveset liveset) in
@@ -559,7 +559,7 @@ module Monitor = struct
finished := true;
end;
- if Unix.gettimeofday () -. start > Xapi_globs.ha_monitor_startup_timeout && disabled <> [] then begin
+ if Unix.gettimeofday () -. start > !Xapi_globs.ha_monitor_startup_timeout && disabled <> [] then begin
info "Master HA startup: Timed out waiting for all live slaves to enable themselves (have some hosts failed to attach storage?) Live but disabled hosts: [ %s ]"
(String.concat "; " (List.map fst disabled));
finished := true
@@ -567,7 +567,7 @@ module Monitor = struct
end;
with e ->
debug "Exception in HA monitor thread while waiting for slaves: %s" (ExnHelper.string_of_exn e);
- Thread.delay Xapi_globs.ha_monitor_timer
+ Thread.delay !Xapi_globs.ha_monitor_interval
done in
(* If we're the master we must wait for our live slaves to turn up before we consider restarting VMs etc *)
@@ -576,7 +576,7 @@ module Monitor = struct
(* Monitoring phase: we must assume the worst and not touch the database here *)
while Mutex.execute m (fun () -> not(!request_shutdown)) do
try
- ignore(Delay.wait delay Xapi_globs.ha_monitor_timer);
+ ignore(Delay.wait delay !Xapi_globs.ha_monitor_interval);
if Mutex.execute m (fun () -> not(!request_shutdown)) then begin
let liveset = query_liveset_on_all_hosts () in
@@ -611,7 +611,7 @@ module Monitor = struct
end
with e ->
debug "Exception in HA monitor thread: %s" (ExnHelper.string_of_exn e);
- Thread.delay Xapi_globs.ha_monitor_timer
+ Thread.delay !Xapi_globs.ha_monitor_interval
done;
debug "Re-enabling old Host_metrics.live heartbeat";
View
2  ocaml/xapi/xapi_host.ml
@@ -466,7 +466,7 @@ let restart_agent ~__context ~host =
let shutdown_agent ~__context =
debug "Host.restart_agent: Host agent will shutdown in 1s!!!!";
- Xapi_fuse.light_fuse_and_dont_restart ~fuse_length:1 ()
+ Xapi_fuse.light_fuse_and_dont_restart ~fuse_length:1. ()
let disable ~__context ~host =
if Db.Host.get_enabled ~__context ~self:host then begin
View
21 ocaml/xapi/xapi_periodic_scheduler_init.ml
@@ -22,19 +22,16 @@ let register () =
debug "Registering periodic calls";
let master = Pool_role.is_master () in
-
+
(* blob/message/rrd file syncing - sync once a day *)
let sync_timer =
- if Xapi_fist.reduce_blob_sync_interval then 60.0 *. 5.0 else 60.0 *. 60.0 *. 24.0 in
+ if Xapi_fist.reduce_blob_sync_interval then 60.0 *. 5.0 else !Xapi_globs.pool_data_sync_interval in
let sync_func () =
Xapi_sync.do_sync () in
let sync_delay =
(* 10 mins if fist point there - to ensure rrd sync happens first *)
- if Xapi_fist.reduce_blob_sync_interval then 60.0 *. 10.0 else 7200.0 in
+ if Xapi_fist.reduce_blob_sync_interval then 60.0 *. 10.0 else 7200. in
- (* Logrotate - poll the amount of data written out by the logger, *)
- (* and call logrotate when it exceeds the threshold *)
- let logrotate_timer = 60.0 *. 5.0 in
let logrotate_func () =
let dorotate = Mutex.execute Log.mutex
(fun () ->
@@ -60,7 +57,7 @@ let register () =
(* Periodic backup of RRDs *)
let rrdbackup_timer =
- if Xapi_fist.reduce_rrd_backup_interval then 60.0 *. 5.0 else 3600.0 *. 24.0 in
+ if Xapi_fist.reduce_rrd_backup_interval then 60.0 *. 5.0 else !Xapi_globs.rrd_backup_interval in
let rrdbackup_func () =
Server_helpers.exec_with_new_task "rrdbackup_func"
(fun __context ->
@@ -72,15 +69,11 @@ let register () =
let rrdbackup_delay =
if Xapi_fist.reduce_rrd_backup_interval then 60.0 *. 6.0 else 3600.0 in
- (* CP-703: Periodic revalidation of externally-authenticated sessions *)
- let session_revalidation_timer = 60.0 *. 5.0 in (* every 5 minutes *)
let session_revalidation_func () =
Server_helpers.exec_with_new_task "session_revalidation_func"
(fun __context -> Xapi_session.revalidate_all_sessions ~__context) in
let session_revalidation_delay = 60.0 *. 5.0 in (* initial delay = 5 minutes *)
- (* CP-820: other-config field in subjects should be periodically refreshed *)
- let update_all_subjects_timer = 60.0 *. 15.0 in (* every 15 minutes *)
let update_all_subjects_func () =
Server_helpers.exec_with_new_task "update_all_subjects_func"
(fun __context -> Xapi_subject.update_all_subjects ~__context) in
@@ -89,10 +82,10 @@ let register () =
if master then Xapi_periodic_scheduler.add_to_queue "Synchronising RRDs/messages" (Xapi_periodic_scheduler.Periodic sync_timer) sync_delay sync_func;
if master then Xapi_periodic_scheduler.add_to_queue "Backing up RRDs" (Xapi_periodic_scheduler.Periodic rrdbackup_timer) rrdbackup_delay rrdbackup_func;
if master then Xapi_periodic_scheduler.add_to_queue "Revalidating externally-authenticated sessions"
- (Xapi_periodic_scheduler.Periodic session_revalidation_timer) session_revalidation_delay session_revalidation_func;
+ (Xapi_periodic_scheduler.Periodic !Xapi_globs.session_revalidation_interval) session_revalidation_delay session_revalidation_func;
if master then Xapi_periodic_scheduler.add_to_queue "Trying to update subjects' info using external directory service (if any)"
- (Xapi_periodic_scheduler.Periodic update_all_subjects_timer) update_all_subjects_delay update_all_subjects_func;
- Xapi_periodic_scheduler.add_to_queue "Logrotate" (Xapi_periodic_scheduler.Periodic logrotate_timer) 120.0 logrotate_func;
+ (Xapi_periodic_scheduler.Periodic !Xapi_globs.update_all_subjects_interval) update_all_subjects_delay update_all_subjects_func;
+ Xapi_periodic_scheduler.add_to_queue "Logrotate" (Xapi_periodic_scheduler.Periodic !Xapi_globs.logrotate_check_interval) 120.0 logrotate_func;
Xapi_periodic_scheduler.add_to_queue "Periodic scheduler heartbeat" (Xapi_periodic_scheduler.Periodic hb_timer) 240.0 hb_func;
Xapi_periodic_scheduler.add_to_queue "Update monitor configuration" (Xapi_periodic_scheduler.Periodic 3600.0) 3600.0 Monitor_rrds.update_configuration_from_master
View
12 ocaml/xapi/xapi_vm_helpers.ml
@@ -742,10 +742,6 @@ let set_memory_target_live ~__context ~self ~target = () (*
Vmopshelpers.with_xs (fun xs -> Balloon.set_memory_target ~xs domid target)
*)
-(** The default upper bound on the length of time to wait *)
-(** for a running VM to reach its current memory target. *)
-let wait_memory_target_timeout_seconds = 256
-
(** The default upper bound on the acceptable difference between *)
(** actual memory usage and target memory usage when waiting for *)
(** a running VM to reach its current memory target. *)
@@ -765,13 +761,13 @@ let is_power_of_2 n =
(** if the time-out counter exceeds its limit, this function *)
(** raises a server error and terminates. *)
let wait_memory_target_live ~__context ~self
- ?(timeout_seconds = wait_memory_target_timeout_seconds)
- ?(tolerance_bytes = wait_memory_target_tolerance_bytes)
- () =
+ ?(timeout_seconds = int_of_float !Xapi_globs.wait_memory_target_timeout)
+ ?(tolerance_bytes = wait_memory_target_tolerance_bytes)
+ () =
let raise_error error =
raise (Api_errors.Server_error (error, [Ref.string_of (Context.get_task_id __context)])) in
let rec wait accumulated_wait_time_seconds =
- if accumulated_wait_time_seconds > wait_memory_target_timeout_seconds
+ if accumulated_wait_time_seconds > timeout_seconds
then raise_error Api_errors.vm_memory_target_wait_timeout;
if TaskHelper.is_cancelling ~__context
then raise_error Api_errors.task_cancelled;
View
2  ocaml/xapi/xapi_vm_snapshot.ml
@@ -75,7 +75,7 @@ let compare_snapid_chunks s1 s2 =
(* to tell us if everything happened nicely. *)
let wait_for_snapshot ~__context ~vm ~xs ~domid ~new_name =
let value = Watch.value_to_appear (snapshot_path ~xs ~domid "status") in
- match Watch.wait_for ~xs ~timeout:(5.*.60.) value with
+ match Watch.wait_for ~xs ~timeout:!Xapi_globs.snapshot_with_quiesce_timeout value with
| "snapshot-created" ->
(* Get the transportable snap ID *)
debug "wait_for_snapshot: getting the transportable ID";
View
5 ocaml/xenops/device.ml
@@ -33,9 +33,6 @@ exception Cdrom
module D = Debug.Debugger(struct let name = "xenops" end)
open D
-let qemu_dm_ready_timeout = 60. *. 20. (* seconds *)
-let qemu_dm_shutdown_timeout = 60. *. 20. (* seconds *)
-
(* keys read by vif udev script (keep in sync with api:scripts/vif) *)
let vif_udev_keys = "promiscuous" :: (List.map (fun x -> "ethtool-" ^ x) [ "rx"; "tx"; "sg"; "tso"; "ufo"; "gso" ])
@@ -1548,7 +1545,7 @@ let get_state ~xs domid =
with _ -> None
(* Returns the allocated vnc port number *)
-let __start ~xs ~dmpath ~restore ?(timeout=qemu_dm_ready_timeout) info domid =
+let __start ~xs ~dmpath ~restore ?(timeout = !Xapi_globs.qemu_dm_ready_timeout) info domid =
debug "Device.Dm.start domid=%d" domid;
let usb' =
if info.usb = [] then
View
11 ocaml/xenops/hotplug.ml
@@ -21,9 +21,6 @@ open Xenstore
module D = Debug.Debugger(struct let name = "hotplug" end)
open D
-(** Time we allow for the hotplug scripts to run before we assume something bad has
- happened and abort *)
-let hotplug_timeout = 60. *. 20. (* seconds *)
(** If we can't execute the losetup program (for example) *)
exception External_command_failure of string
@@ -118,7 +115,7 @@ let wait_for_plug ~xs (x: device) =
Stats.time_this "udev backend add event"
(fun () ->
let path = path_written_by_hotplug_scripts x in
- ignore(Watch.wait_for ~xs ~timeout:hotplug_timeout (Watch.value_to_appear path));
+ ignore(Watch.wait_for ~xs ~timeout:!Xapi_globs.hotplug_timeout (Watch.value_to_appear path));
);
debug "Synchronised ok with hotplug script: %s" (string_of_device x)
with Watch.Timeout _ ->
@@ -130,7 +127,7 @@ let wait_for_unplug ~xs (x: device) =
Stats.time_this "udev backend remove event"
(fun () ->
let path = path_written_by_hotplug_scripts x in
- ignore(Watch.wait_for ~xs ~timeout:hotplug_timeout (Watch.key_to_disappear path));
+ ignore(Watch.wait_for ~xs ~timeout:!Xapi_globs.hotplug_timeout (Watch.key_to_disappear path));
);
debug "Synchronised ok with hotplug script: %s" (string_of_device x)
with Watch.Timeout _ ->
@@ -145,7 +142,7 @@ let wait_for_frontend_plug ~xs (x: device) =
let blkback_error_watch = Watch.value_to_appear (blkback_error_node ~xs x) in
Stats.time_this "udev frontend add event"
(fun () ->
- match Watch.wait_for ~xs ~timeout:hotplug_timeout
+ match Watch.wait_for ~xs ~timeout:!Xapi_globs.hotplug_timeout
(Watch.any_of [ `OK, ok_watch; `Failed, tapdisk_error_watch; `Failed, blkback_error_watch ]) with
| `OK, _ ->
debug "Synchronised ok with frontend hotplug script: %s" (string_of_device x)
@@ -163,7 +160,7 @@ let wait_for_frontend_unplug ~xs (x: device) =
let path = frontend_status_node x in
Stats.time_this "udev frontend remove event"
(fun () ->
- ignore(Watch.wait_for ~xs ~timeout:hotplug_timeout (Watch.key_to_disappear path));
+ ignore(Watch.wait_for ~xs ~timeout:!Xapi_globs.hotplug_timeout (Watch.key_to_disappear path));
);
debug "Synchronised ok with frontend hotplug script: %s" (string_of_device x)
with Watch.Timeout _ ->
View
10 ocaml/xenops/squeezed.ml
@@ -13,7 +13,6 @@
*)
let default_pidfile = "/var/run/squeezed.pid"
let log_file_path = "file:/var/log/squeezed.log"
-let idle_timeout = 10. (* seconds per balancing check *)
open Pervasiveext
open Squeezed_rpc
@@ -216,7 +215,10 @@ let _ =
"Usage: squeezed [-daemon] [-pidfile filename]";
Logs.reset_all [ log_file_path ];
-
+ begin
+ try Xapi_globs.read_external_config ()
+ with e -> debug "Read global variables config from %s failed: %s. Continue with default setting." Xapi_globs.xapi_globs_conf (Printexc.to_string e)
+ end;
debug "Writing reserved-host-memory=%Ld KiB" Squeeze_xen.target_host_free_mem_kib;
with_xc_and_xs (fun _ xs -> xs.Xs.write (reserved_host_memory_path _service) (Int64.to_string Squeeze_xen.target_host_free_mem_kib));
@@ -225,9 +227,9 @@ let _ =
Unixext.mkdir_rec (Filename.dirname !pidfile) 0o755;
Unixext.pidfile_write !pidfile;
- debug "Starting daemon listening on %s with idle_timeout = %.0f" _service idle_timeout;
+ debug "Starting daemon listening on %s with idle_timeout = %.0f" _service !Xapi_globs.squeezed_balance_check_interval;
try
- with_xc_and_xs (fun xc xs -> Rpc.loop ~xc ~xs ~service:_service ~function_table ~idle_timeout ~idle_callback:(idle_callback ~xc ~xs) () );
+ with_xc_and_xs (fun xc xs -> Rpc.loop ~xc ~xs ~service:_service ~function_table ~idle_timeout:!Xapi_globs.squeezed_balance_check_interval ~idle_callback:(idle_callback ~xc ~xs) () );
debug "Graceful shutdown";
exit 0
with e ->
Please sign in to comment.
Something went wrong with that request. Please try again.