Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ocaml/idl/datamodel_errors.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1129,7 +1129,7 @@ let _ =
error Api_errors.clustering_disabled ["cluster_host"]
~doc:"An operation was attempted while clustering was disabled on the cluster_host." ();
error Api_errors.cluster_does_not_have_one_node ["number_of_nodes"]
~doc:"The cluster does not have only one node." ();
~doc:"An operation failed as it expected the cluster to have only one node but found multiple cluster_hosts." ();
error Api_errors.no_compatible_cluster_host ["host"]
~doc:"The host does not have a Cluster_host with a compatible cluster stack." ();
error Api_errors.cluster_force_destroy_failed ["cluster"]
Expand Down
85 changes: 0 additions & 85 deletions ocaml/quicktest/quicktest_cluster.ml

This file was deleted.

4 changes: 1 addition & 3 deletions ocaml/tests/test_clustering.ml
Original file line number Diff line number Diff line change
Expand Up @@ -621,9 +621,7 @@ let test_pool_ha_cluster_stacks_with_ha_with_clustering () =
assert_cluster_stack_is default_smapiv3 ~__context;

Db.Pool.set_ha_enabled ~__context ~self:pool ~value:true;
Xapi_cluster_host.force_destroy ~__context ~self:cluster_host2;
assert_cluster_stack_is default_smapiv3 ~__context;
Xapi_cluster.destroy ~__context ~self:cluster2;
Xapi_cluster.pool_destroy ~__context ~self:cluster2;
assert_cluster_stack_is default ~__context


Expand Down
17 changes: 16 additions & 1 deletion ocaml/xapi/message_forwarding.ml
Original file line number Diff line number Diff line change
Expand Up @@ -4297,6 +4297,17 @@ module Forward = functor(Local: Custom_actions.CUSTOM_ACTIONS) -> struct
end

module Cluster = struct

let forward_cluster_op ~local_fn ~__context op =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: The commit title should say something like: forward Cluster.destroy to a cluster member, localhost if possible.
The goal is to forward to a host that is a member of the cluster, localhost is only an optimization.

let localhost = Helpers.get_localhost ~__context in
let is_local_cluster_host self = Db.Cluster_host.get_host ~__context ~self = localhost in
let host = match Db.Cluster_host.get_all ~__context with
| [] -> Helpers.get_master ~__context
| lst when List.exists is_local_cluster_host lst -> localhost
| self :: _ -> Db.Cluster_host.get_host ~__context ~self
in
do_op_on ~local_fn ~__context ~host op

let create ~__context ~pIF ~cluster_stack ~pool_auto_join ~token_timeout ~token_timeout_coefficient =
info "Cluster.create";
let pool = Helpers.get_pool ~__context in (* assumes 1 pool in DB *)
Expand All @@ -4311,7 +4322,10 @@ module Forward = functor(Local: Custom_actions.CUSTOM_ACTIONS) -> struct
info "Cluster.destroy cluster: %s" (Ref.string_of self);
Xapi_cluster_helpers.with_cluster_operation ~__context ~self ~doc:"Cluster.destroy" ~op:`destroy
(fun () ->
Local.Cluster.destroy ~__context ~self)
let local_fn = Local.Cluster.destroy ~self in
forward_cluster_op ~__context ~local_fn
(fun rpc session_id -> Client.Cluster.destroy session_id rpc self)
)

let get_network ~__context ~self =
info "Cluster.get_network";
Expand Down Expand Up @@ -4395,6 +4409,7 @@ module Forward = functor(Local: Custom_actions.CUSTOM_ACTIONS) -> struct

let forget ~__context ~self =
info "Cluster_host.forget cluster_host:%s" (Ref.string_of self);
Db.Cluster_host.set_joined ~__context ~self ~value:false;
let cluster = Db.Cluster_host.get_cluster ~__context ~self in
let local_fn = Local.Cluster_host.forget ~self in
(* We need to ask another host that has a cluster host to mark it as dead.
Expand Down
143 changes: 63 additions & 80 deletions ocaml/xapi/xapi_cluster.ml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ let create ~__context ~pIF ~cluster_stack ~pool_auto_join ~token_timeout ~token_
let destroy ~__context ~self =
let cluster_hosts = Db.Cluster.get_cluster_hosts ~__context ~self in
let cluster_host = match cluster_hosts with
| [] -> None
| [] ->
info "No cluster_hosts found. Proceeding with cluster destruction.";
None
| [ cluster_host ] -> Some (cluster_host)
| _ ->
let n = List.length cluster_hosts in
Expand All @@ -93,10 +95,12 @@ let destroy ~__context ~self =
set_ha_cluster_stack ~__context;
Xapi_clustering.Daemon.disable ~__context

let get_network ~__context ~self =
get_network_internal ~__context ~self
(* Get pool master's cluster_host, return network of PIF *)
let get_network = get_network_internal

(** Cluster.pool* functions are convenience wrappers for iterating low-level APIs over a pool.
Concurrency checks are done in the implementation of these calls *)

(* helper function; concurrency checks are done in implementation of Cluster.create and Cluster_host.create *)
let pool_create ~__context ~network ~cluster_stack ~token_timeout ~token_timeout_coefficient =
validate_params ~token_timeout ~token_timeout_coefficient;
let master = Helpers.get_master ~__context in
Expand All @@ -108,103 +112,82 @@ let pool_create ~__context ~network ~cluster_stack ~token_timeout ~token_timeout
in

List.iter (fun host ->
(* Cluster.create already created cluster_host on master, so we only need to iterate through slaves *)
(* Cluster.create already created cluster_host on master, so we only iterate through slaves *)
Helpers.call_api_functions ~__context (fun rpc session_id ->
let pifref,_ = pif_of_host ~__context network host in
let cluster_host_ref = Client.Client.Cluster_host.create ~rpc ~session_id ~cluster ~host ~pif:pifref in
let pif,_ = pif_of_host ~__context network host in
let cluster_host_ref = Client.Client.Cluster_host.create ~rpc ~session_id ~cluster ~host ~pif in
D.debug "Created Cluster_host: %s" (Ref.string_of cluster_host_ref);
)) slave_hosts;

cluster

(* Helper function; if opn is None return all, else return those not equal to it *)
let filter_on_option opn xs =
match opn with
| None -> xs
| Some x -> List.filter ((<>) x) xs
let foreach_cluster_host ~__context ~self ~(fn : rpc:(Rpc.call -> Rpc.response) ->
session_id:API.ref_session -> self:API.ref_Cluster_host -> unit) ~log =
let wrapper = if log then log_and_ignore_exn else (fun f -> f ()) in
List.iter (fun self ->
Helpers.call_api_functions ~__context
(fun rpc session_id -> wrapper (fun () -> fn rpc session_id self)))

(* Helper function; concurrency checks are done in implementation of Cluster.destroy and Cluster_host.destroy *)
let pool_force_destroy ~__context ~self =
(* For now we assume we have only one pool, and that the cluster is the same as the pool.
This means that the pool master must be a member of this cluster. *)
let master = Helpers.get_master ~__context in
let master_cluster_host =
Xapi_clustering.find_cluster_host ~__context ~host:master
in
let pool_destroy_common ~__context ~self ~force =
(* Prevent new hosts from joining if destroy fails *)
Db.Cluster.set_pool_auto_join ~__context ~self ~value:false;
let slave_cluster_hosts =
Db.Cluster.get_cluster_hosts ~__context ~self |> filter_on_option master_cluster_host
let all_hosts = Db.Cluster.get_cluster_hosts ~__context ~self in
let master = Helpers.get_master ~__context in
match Xapi_clustering.find_cluster_host ~__context ~host:master with
| None -> all_hosts
| Some master_ch ->
List.filter ((<>) master_ch) all_hosts
in
debug "Destroying cluster_hosts in pool";
(* First try to destroy each cluster_host - if we can do so safely then do *)
List.iter
(fun cluster_host ->
(* We need to run this code on the slave *)
(* We ignore failures here, we'll try a force_destroy after *)
log_and_ignore_exn (fun () ->
Helpers.call_api_functions ~__context (fun rpc session_id ->
Client.Client.Cluster_host.destroy ~rpc ~session_id ~self:cluster_host)
)
)
slave_cluster_hosts;
(* We expect destroy to have failed for some, we'll try to force destroy those *)
(* Note we include the master here, we should attempt to force destroy it *)
let all_remaining_cluster_hosts =
Db.Cluster.get_cluster_hosts ~__context ~self
in
(* Now try to force_destroy, keep track of any errors here *)
let exns = List.fold_left
(fun exns_so_far cluster_host ->
Helpers.call_api_functions ~__context (fun rpc session_id ->
try
Client.Client.Cluster_host.force_destroy ~rpc ~session_id ~self:cluster_host;
exns_so_far
with e ->
Backtrace.is_important e;
let uuid = Client.Client.Cluster_host.get_uuid ~rpc ~session_id ~self:cluster_host in
debug "Ignoring exception while trying to force destroy cluster host %s: %s" uuid (ExnHelper.string_of_exn e);
e :: exns_so_far
)
)
[] all_remaining_cluster_hosts
in
foreach_cluster_host ~__context ~self ~log:force
~fn:Client.Client.Cluster_host.destroy
slave_cluster_hosts

begin match exns with
| [] -> D.debug "Successfully destroyed all cluster_hosts in pool, now destroying cluster %s" (Ref.string_of self)
| e :: _ -> raise Api_errors.(Server_error (cluster_force_destroy_failed, [Ref.string_of self]))
end;
let pool_force_destroy ~__context ~self =
(* Set pool_autojoin:false and try to destroy slave cluster_hosts *)
pool_destroy_common ~__context ~self ~force:true;

(* Note we include the master here, we should attempt to force destroy it *)
(* Now try to force_destroy, keep track of any errors here *)
debug "Ignoring exceptions while trying to force destroy cluster hosts.";
foreach_cluster_host ~__context ~self ~log:true
~fn:Client.Client.Cluster_host.force_destroy
(Db.Cluster.get_cluster_hosts ~__context ~self);

info "Forgetting any cluster_hosts that couldn't be destroyed.";
foreach_cluster_host ~__context ~self ~log:true
~fn:Client.Client.Cluster_host.forget
(Db.Cluster_host.get_all ~__context);

let unforgotten_cluster_hosts = List.filter
(fun self -> not (Db.Cluster_host.get_joined ~__context ~self))
(Db.Cluster_host.get_all ~__context)
in
info "If forget failed on any remaining cluster_hosts, we now delete them competely";
foreach_cluster_host ~__context ~self ~log:false
~fn:(fun ~rpc ~session_id ~self -> Db.Cluster_host.destroy ~__context ~self)
unforgotten_cluster_hosts;

match Db.Cluster_host.get_all ~__context with
| [] ->
D.debug "Successfully destroyed all cluster_hosts in pool, now destroying cluster %s"
(Ref.string_of self);
Helpers.call_api_functions ~__context (fun rpc session_id ->
Client.Client.Cluster.destroy ~rpc ~session_id ~self);
debug "Cluster_host.force_destroy was successful"
debug "Cluster.pool_force_destroy was successful";
| _ -> raise Api_errors.(Server_error (cluster_force_destroy_failed, [Ref.string_of self]))

(* Helper function; concurrency checks are done in implementation of Cluster.destroy and Cluster_host.destroy *)
let pool_destroy ~__context ~self =
(* For now we assume we have only one pool, and that the cluster is the same as the pool.
This means that the pool master must be a member of this cluster. *)
let master = Helpers.get_master ~__context in
let master_cluster_host =
Xapi_clustering.find_cluster_host ~__context ~host:master
|> Xapi_stdext_monadic.Opt.unbox
in
let slave_cluster_hosts =
Db.Cluster.get_cluster_hosts ~__context ~self |> List.filter ((<>) master_cluster_host)
in
(* First destroy the Cluster_host objects of the slaves *)
List.iter
(fun cluster_host ->
(* We need to run this code on the slave *)
Helpers.call_api_functions ~__context (fun rpc session_id ->
Client.Client.Cluster_host.destroy ~rpc ~session_id ~self:cluster_host)
)
slave_cluster_hosts;
(* Set pool_autojoin:false and try to destroy slave cluster_hosts *)
pool_destroy_common ~__context ~self ~force:false;

(* Then destroy the Cluster_host of the pool master and the Cluster itself *)
Helpers.call_api_functions ~__context (fun rpc session_id ->
Client.Client.Cluster.destroy ~rpc ~session_id ~self)

let pool_resync ~__context ~(self : API.ref_Cluster) =
List.iter
(fun host -> log_and_ignore_exn
(fun () ->
(fun host -> log_and_ignore_exn (fun () ->
Xapi_cluster_host.create_as_necessary ~__context ~host;
Xapi_cluster_host.resync_host ~__context ~host;
if is_clustering_disabled_on_host ~__context host
Expand Down
7 changes: 5 additions & 2 deletions ocaml/xapi/xapi_cluster.mli
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,11 @@ val pool_create : __context:Context.t ->
val pool_force_destroy : __context:Context.t -> self:API.ref_Cluster -> unit
(** [pool_force_destroy ~__context ~self] is the implementation of the XenAPI
method 'Cluster.pool_force_destroy'. This is a convenience function that
attempts to force destroy the Cluster_host objects for all hosts in the pool
and then destroys the Cluster object if it was successful. *)
first attempts to destroy the Cluster_host objects for all hosts in the pool.
Any surviving cluster_hosts are force destroyed, any remaining after that are
forgotten, and any cluster_hosts for which forget fails will be deleted. After
this, the cluster is destroyed, unless there are still cluster_hosts remaining,
in which case the call raises an API error. *)

val pool_destroy : __context:Context.t -> self:API.ref_Cluster -> unit
(** [pool_destroy ~__context ~self] is the implementation of the XenAPI
Expand Down
Loading