Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 91 additions & 4 deletions ocaml/tests/test_ha_vm_failover.ml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ type pool = {
master: host;
slaves: host list;
ha_host_failures_to_tolerate: int64;
cluster: int;
}

let string_of_vm {memory; name_label} =
Expand All @@ -68,12 +69,13 @@ let string_of_host {memory_total; name_label; vms} =
memory_total name_label
(Test_printers.list string_of_vm vms)

let string_of_pool {master; slaves; ha_host_failures_to_tolerate} =
let string_of_pool {master; slaves; ha_host_failures_to_tolerate; cluster} =
Printf.sprintf
"{master = %s; slaves = %s; ha_host_failures_to_tolerate = %Ld}"
"{master = %s; slaves = %s; ha_host_failures_to_tolerate = %Ld; cluster = %d}"
(string_of_host master)
(Test_printers.list string_of_host slaves)
ha_host_failures_to_tolerate
cluster

let load_vm ~__context ~(vm:vm) ~local_sr ~shared_sr ~local_net ~shared_net =
let vm_ref = make_vm ~__context
Expand Down Expand Up @@ -120,10 +122,9 @@ let load_host ~__context ~host ~local_sr ~shared_sr ~local_net ~shared_net =
in
host_ref

let setup ~__context {master; slaves; ha_host_failures_to_tolerate} =
let setup ~__context {master; slaves; ha_host_failures_to_tolerate; cluster} =
let shared_sr = make_sr ~__context ~shared:true () in
let shared_net = make_network ~__context ~bridge:"xenbr0" () in

(* Remove all hosts added by make_test_database *)
List.iter (fun host -> Db.Host.destroy ~__context ~self:host) (Db.Host.get_all ~__context);

Expand All @@ -149,6 +150,13 @@ let setup ~__context {master; slaves; ha_host_failures_to_tolerate} =
let master_ref = load_host_and_local_resources master in
let (_ : API.ref_host list) = List.map load_host_and_local_resources slaves in

if cluster > 0 then
Test_common.make_cluster_and_cluster_host ~__context () |> ignore;
for i = 0 to (cluster - 1) do
let host = List.nth (Db.Host.get_all ~__context) i in
Test_common.make_cluster_host ~__context ~host () |> ignore;
done;

let pool = Db.Pool.get_all ~__context |> List.hd in

Db.Pool.set_master ~__context ~self:pool ~value:master_ref;
Expand Down Expand Up @@ -181,6 +189,7 @@ module AllProtectedVms = Generic.Make(Generic.EncapsulateState(struct
master = {memory_total = gib 256L; name_label = "master"; vms = []};
slaves = [];
ha_host_failures_to_tolerate = 0L;
cluster = 0;
},
[];
(* One unprotected VM. *)
Expand All @@ -194,6 +203,7 @@ module AllProtectedVms = Generic.Make(Generic.EncapsulateState(struct
};
slaves = [];
ha_host_failures_to_tolerate = 0L;
cluster = 0;
},
[];
(* One VM which would be protected if it was running. *)
Expand All @@ -204,6 +214,7 @@ module AllProtectedVms = Generic.Make(Generic.EncapsulateState(struct
};
slaves = [];
ha_host_failures_to_tolerate = 0L;
cluster = 0;
},
[];
(* One protected VM. *)
Expand All @@ -214,6 +225,7 @@ module AllProtectedVms = Generic.Make(Generic.EncapsulateState(struct
};
slaves = [];
ha_host_failures_to_tolerate = 0L;
cluster = 0;
},
["vm"];
(* One protected VM and one unprotected VM. *)
Expand All @@ -231,6 +243,7 @@ module AllProtectedVms = Generic.Make(Generic.EncapsulateState(struct
};
slaves = [];
ha_host_failures_to_tolerate = 0L;
cluster = 0;
},
["vm1"];
]
Expand Down Expand Up @@ -270,6 +283,7 @@ module PlanForNFailures = Generic.Make(Generic.EncapsulateState(struct
{memory_total = gib 256L; name_label = "slave"; vms = []}
];
ha_host_failures_to_tolerate = 1L;
cluster = 0;
},
Xapi_ha_vm_failover.Plan_exists_for_all_VMs
);
Expand All @@ -288,6 +302,7 @@ module PlanForNFailures = Generic.Make(Generic.EncapsulateState(struct
{memory_total = gib 256L; name_label = "slave"; vms = []}
];
ha_host_failures_to_tolerate = 1L;
cluster = 0;
},
Xapi_ha_vm_failover.Plan_exists_for_all_VMs
);
Expand All @@ -311,6 +326,7 @@ module PlanForNFailures = Generic.Make(Generic.EncapsulateState(struct
{memory_total = gib 256L; name_label = "slave"; vms = []}
];
ha_host_failures_to_tolerate = 1L;
cluster = 0;
},
Xapi_ha_vm_failover.Plan_exists_for_all_VMs
);
Expand Down Expand Up @@ -346,6 +362,7 @@ module PlanForNFailures = Generic.Make(Generic.EncapsulateState(struct
}
];
ha_host_failures_to_tolerate = 1L;
cluster = 0;
},
Xapi_ha_vm_failover.No_plan_exists
);
Expand Down Expand Up @@ -415,6 +432,7 @@ module AssertNewVMPreservesHAPlan = Generic.Make(Generic.EncapsulateState(struct
{memory_total = gib 256L; name_label = "slave"; vms = []}
];
ha_host_failures_to_tolerate = 1L;
cluster = 0;
},
{basic_vm with
ha_always_run = false;
Expand Down Expand Up @@ -445,6 +463,7 @@ module AssertNewVMPreservesHAPlan = Generic.Make(Generic.EncapsulateState(struct
{memory_total = gib 256L; name_label = "slave"; vms = []}
];
ha_host_failures_to_tolerate = 1L;
cluster = 0;
},
{basic_vm with
ha_always_run = false;
Expand Down Expand Up @@ -483,6 +502,7 @@ module AssertNewVMPreservesHAPlan = Generic.Make(Generic.EncapsulateState(struct
};
];
ha_host_failures_to_tolerate = 1L;
cluster = 0;
},
{basic_vm with
ha_always_run = false;
Expand All @@ -495,11 +515,78 @@ module AssertNewVMPreservesHAPlan = Generic.Make(Generic.EncapsulateState(struct
]
end))

module ComputeMaxFailures = Generic.Make(Generic.EncapsulateState(struct
module Io = struct
open Xapi_ha_vm_failover

type input_t = pool
type output_t = int

let string_of_input_t = string_of_pool
let string_of_output_t = string_of_int
end

module State = Test_state.XapiDb

let load_input __context = setup ~__context

let extract_output __context pool =
let max_hosts = Xapi_ha_vm_failover.compute_max_host_failures_to_tolerate ~__context () in
(* Struct requires input_t but not used here *)
pool |> ignore;
Int64.to_int max_hosts

let tests = [
(* Three host pool with no VMs. *)
(
{
master = {memory_total = gib 256L; name_label = "master"; vms = []};
slaves = [
{memory_total = gib 256L; name_label = "slave1"; vms = []};
{memory_total = gib 256L; name_label = "slave2"; vms = []}
];
(* Placeholder value that is overridden when we call the compute function *)
ha_host_failures_to_tolerate = 3L;
cluster = 3;
},
(* Assert that compute ha host failures to tolerate returns 1 *)
1
);
(* Two hosts pool with no VMs *)
(
{
master = {memory_total = gib 256L; name_label = "master"; vms = []};
slaves = [
{memory_total = gib 256L; name_label = "slave1"; vms = []}
];
ha_host_failures_to_tolerate = 2L;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If your pool only has 2 hosts how can you loose them all? I think you need some actual VMs for the tests to make sense, indeed if you have no VMs you might as well turn the whole pool off, but thats not a realistic scenario.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comment above. This number is inserted into the database as part of the set up but is over-ridden when we call the compute function. For 2 hosts currently expecting to tolerate 0 host failures as per the expected result three lines below.

cluster = 2;
},
(* Assert that compute ha host failures to tolerate returns 0 *)
0
);
(* Two host pool with one down *)
(
{
master = {memory_total = gib 256L; name_label = "master"; vms = []};
slaves = [
{memory_total = gib 256L; name_label = "slave1"; vms = []}
];
ha_host_failures_to_tolerate = 2L;
cluster = 1;
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated so cluster now dictates how many hosts to enable clustering on

},
(* Assert that compute ha host failures to tolerate returns 1 *)
1
);
]
end))

let test =
"test_ha_vm_failover" >:::
[
"test_all_protected_vms" >::: AllProtectedVms.tests;
"test_plan_for_n_failures" >::: PlanForNFailures.tests;
"test_assert_new_vm_preserves_ha_plan" >:::
AssertNewVMPreservesHAPlan.tests;
"test_corosync_max_host_failures" >::: ComputeMaxFailures.tests;
]
19 changes: 11 additions & 8 deletions ocaml/xapi/message_forwarding.ml
Original file line number Diff line number Diff line change
Expand Up @@ -591,14 +591,16 @@ module Forward = functor(Local: Custom_actions.CUSTOM_ACTIONS) -> struct
Local.Pool.ha_failover_plan_exists ~__context ~n

let ha_compute_max_host_failures_to_tolerate ~__context =
info "Pool.ha_compute_max_host_failures_to_tolerate: pool = '%s'" (current_pool_uuid ~__context);
Local.Pool.ha_compute_max_host_failures_to_tolerate ~__context
Xapi_clustering.with_clustering_lock_if_cluster_exists ~__context (fun () ->
info "Pool.ha_compute_max_host_failures_to_tolerate: pool = '%s'" (current_pool_uuid ~__context);
Local.Pool.ha_compute_max_host_failures_to_tolerate ~__context)

let ha_compute_hypothetical_max_host_failures_to_tolerate ~__context ~configuration =
info "Pool.ha_compute_hypothetical_max_host_failures_to_tolerate: pool = '%s'; configuration = [ %s ]"
(current_pool_uuid ~__context)
(String.concat "; " (List.map (fun (vm, p) -> Ref.string_of vm ^ " " ^ p) configuration));
Local.Pool.ha_compute_hypothetical_max_host_failures_to_tolerate ~__context ~configuration
Xapi_clustering.with_clustering_lock_if_cluster_exists ~__context (fun () ->
info "Pool.ha_compute_hypothetical_max_host_failures_to_tolerate: pool = '%s'; configuration = [ %s ]"
(current_pool_uuid ~__context)
(String.concat "; " (List.map (fun (vm, p) -> Ref.string_of vm ^ " " ^ p) configuration));
Local.Pool.ha_compute_hypothetical_max_host_failures_to_tolerate ~__context ~configuration)

let ha_compute_vm_failover_plan ~__context ~failed_hosts ~failed_vms =
info "Pool.ha_compute_vm_failover_plan: pool = '%s'; failed_hosts = [ %s ]; failed_vms = [ %s ]"
Expand All @@ -608,8 +610,9 @@ module Forward = functor(Local: Custom_actions.CUSTOM_ACTIONS) -> struct
Local.Pool.ha_compute_vm_failover_plan ~__context ~failed_hosts ~failed_vms

let set_ha_host_failures_to_tolerate ~__context ~self ~value =
info "Pool.set_ha_host_failures_to_tolerate: pool = '%s'; value = %Ld" (pool_uuid ~__context self) value;
Local.Pool.set_ha_host_failures_to_tolerate ~__context ~self ~value
Xapi_clustering.with_clustering_lock_if_cluster_exists ~__context (fun () ->
info "Pool.set_ha_host_failures_to_tolerate: pool = '%s'; value = %Ld" (pool_uuid ~__context self) value;
Local.Pool.set_ha_host_failures_to_tolerate ~__context ~self ~value)

let ha_schedule_plan_recomputation ~__context =
info "Pool.ha_schedule_plan_recomputation: pool = '%s'" (current_pool_uuid ~__context);
Expand Down
12 changes: 12 additions & 0 deletions ocaml/xapi/xapi_clustering.ml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,11 @@ let with_clustering_lock_if_needed ~__context ~sr_sm_type f =
| [] -> f ()
| _required_cluster_stacks -> with_clustering_lock f

let with_clustering_lock_if_cluster_exists ~__context f =
match Db.Cluster.get_all ~__context with
| [] -> f ()
| _ -> with_clustering_lock f

let find_cluster_host ~__context ~host =
match Db.Cluster_host.get_refs_where ~__context
~expr:(Db_filter_types.(Eq (Field "host", Literal (Ref.string_of host)))) with
Expand Down Expand Up @@ -203,3 +208,10 @@ let is_clustering_disabled_on_host ~__context host =
match find_cluster_host ~__context ~host with
| None -> true (* there is no Cluster_host, therefore it is not enabled, therefore it is disabled *)
| Some cluster_host -> not (Db.Cluster_host.get_enabled ~__context ~self:cluster_host)

let compute_corosync_max_host_failures ~__context =
let all_hosts = Db.Host.get_all ~__context in
let nhosts = List.length (all_hosts) in
let disabled_hosts = List.length (List.filter (fun host -> is_clustering_disabled_on_host ~__context host = true ) all_hosts) in
let corosync_ha_max_hosts = ((nhosts - disabled_hosts - 1) / 2) + disabled_hosts in
corosync_ha_max_hosts
Loading