From 98d5077c8da01047068dcf0a27be7808b507d6cf Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Fri, 28 Nov 2025 13:12:29 +0800 Subject: [PATCH 1/3] CA-419908: Move module Watcher ahead for future use in module VM Signed-off-by: Ming Lu --- ocaml/xenopsd/xc/xenops_server_xen.ml | 818 +++++++++++++------------- 1 file changed, 409 insertions(+), 409 deletions(-) diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 5274569ef4..4f73cff49b 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -26,6 +26,12 @@ open D module RRDD = Rrd_client.Client module StringSet = Set.Make (String) +module IntMap = Map.Make (struct + type t = int + + let compare = compare +end) + let finally = Xapi_stdext_pervasives.Pervasiveext.finally let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute @@ -1166,84 +1172,421 @@ let dm_of ~vm = let vtpm_of ~vm = match vm.Vm.ty with Vm.HVM h -> h.tpm | _ -> None -module VM = struct - open Vm +module Actions = struct + (* CA-76600: the rtc/timeoffset needs to be maintained over a migrate. *) + let store_rtc_timeoffset vm timeoffset = + let _ = + DB.update vm + (Option.map (function {VmExtra.persistent} as extra -> + ( match persistent with + | {VmExtra.ty= Some (Vm.HVM hvm_info); _} -> + let platformdata = + ("timeoffset", timeoffset) + :: List.remove_assoc "timeoffset" persistent.platformdata + in + let persistent = + { + persistent with + VmExtra.ty= Some (Vm.HVM {hvm_info with Vm.timeoffset}) + ; platformdata + } + in + debug "VM = %s; rtc/timeoffset <- %s" vm timeoffset ; + VmExtra.{persistent} + | _ -> + extra + ) + ) + ) + in + () - let will_be_hvm vm = match vm.ty with HVM _ -> true | _ -> false + let xenbus_connected = Xenbus_utils.(int_of Connected) |> string_of_int - let profile_of ~vm = - if will_be_hvm vm then - Some (choose_qemu_dm vm.Xenops_interface.Vm.platformdata) - else - None + let maybe_update_pv_drivers_detected ~xc ~xs domid path = + let vm = get_uuid ~xc domid |> Uuidx.to_string in + Option.iter + (function + | {VmExtra.persistent} -> ( + try + let value = xs.Xs.read path in + let pv_drivers_detected = + match + ( value = xenbus_connected + , persistent.VmExtra.pv_drivers_detected + ) + with + | true, false -> + (* State "connected" (4) means that PV drivers are present for + this device *) + debug "VM = %s; found PV driver evidence on %s (value = %s)" + vm path value ; + true + | false, true -> + (* This device is not connected, while earlier we detected PV + drivers. We conclude that drivers are still present if any + other device is connected. *) + let devices = Device_common.list_frontends ~xs domid in + let found = + (* Return `true` as soon as a device in state 4 is found. *) + List.exists + (fun device -> + try + xs.Xs.read + (Device_common.backend_state_path_of_device ~xs + device + ) + = xenbus_connected + with Xs_protocol.Enoent _ -> false + ) + devices + in + if not found then (* No devices in state "connected" (4) *) + debug "VM = %s; lost PV driver evidence" vm ; + found + | _ -> + (* No change *) + persistent.VmExtra.pv_drivers_detected + in + let updated = + DB.update vm + (Option.map (function {VmExtra.persistent} -> + let persistent = + {persistent with VmExtra.pv_drivers_detected} + in + VmExtra.{persistent} + ) + ) + in + if updated then + Updates.add (Dynamic.Vm vm) internal_updates + with Xs_protocol.Enoent _ -> + warn "Watch event on %s fired but couldn't read from it" path ; + () + (* the path must have disappeared immediately after the watch fired. + Let's treat this as if we never saw it. *) + ) + ) + (DB.read vm) - let dm_of ~vm = dm_of ~vm:vm.Vm.id + let interesting_paths_for_domain domid uuid = + let open Printf in + [ + sprintf "/local/domain/%d/attr" domid + ; sprintf "/local/domain/%d/data/ts" domid + ; sprintf "/local/domain/%d/data/service" domid + ; sprintf "/local/domain/%d/data/pvs_target" domid + ; sprintf "/local/domain/%d/memory/target" domid + ; sprintf "/local/domain/%d/memory/uncooperative" domid + ; sprintf "/local/domain/%d/console/vnc-port" domid + ; sprintf "/local/domain/%d/console/tc-port" domid + ; Service.Qemu.pidxenstore_path_signal domid + ; sprintf "/local/domain/%d/control" domid + ; sprintf "/local/domain/%d/device" domid + ; sprintf "/local/domain/%d/rrd" domid + ; sprintf "/local/domain/%d/vm-data" domid + ; sprintf "/local/domain/%d/feature" domid + ; sprintf "/vm/%s/rtc/timeoffset" uuid + ; sprintf "/local/domain/%d/xenserver/attr" domid + ] - let compute_overhead persistent vcpu_max memory_static_max shadow_multiplier = - let open VmExtra in - let static_max_mib = Memory.mib_of_bytes_used memory_static_max in - let model = - match persistent.ty with - | Some (PV _) -> - Memory.Linux.overhead_mib - | Some (PVinPVH _) -> - Memory.PVinPVH.overhead_mib - | Some (HVM _ | PVH _) -> - Memory.HVM.overhead_mib - | None -> - failwith - "cannot compute memory overhead: unable to determine domain type" + let watch_token domid = Printf.sprintf "xenopsd-xc:domain-%d" domid + + let watches_of_device dev = + let interesting_backend_keys = + [ + "kthread-pid" + ; "tapdisk-pid" + ; "shutdown-done" + ; "hotplug-status" + ; "params" + ; "state" + ] in - model static_max_mib vcpu_max shadow_multiplier |> Memory.bytes_of_mib + let open Device_common in + let be = dev.backend.domid in + let fe = dev.frontend.domid in + let kind = string_of_kind dev.backend.kind in + let devid = dev.frontend.devid in + List.map + (fun k -> + Printf.sprintf "/local/domain/%d/backend/%s/%d/%d/%s" be kind fe devid k + ) + interesting_backend_keys - let shutdown_reason = function - | Reboot -> - Domain.Reboot - | PowerOff -> - Domain.PowerOff - | Suspend -> - Domain.Suspend - | Halt -> - Domain.Halt - | S3Suspend -> - Domain.S3Suspend + let unmanaged_domain domid id = domid > 0 && not (DB.exists id) - (* We compute our initial target at memory reservation time, done before the - domain is created. We consume this information later when the domain is - built. *) - let set_initial_target ~xs domid initial_target = - xs.Xs.write - (Printf.sprintf "/local/domain/%d/memory/initial-target" domid) - (Int64.to_string initial_target) + let found_running_domain _domid id = + Updates.add (Dynamic.Vm id) internal_updates - let get_initial_target ~xs domid = - Int64.of_string - (xs.Xs.read - (Printf.sprintf "/local/domain/%d/memory/initial-target" domid) + let device_watches = ref IntMap.empty + + let domain_appeared _xc _xs domid = + device_watches := IntMap.add domid [] !device_watches + + let domain_disappeared _xc xs domid = + let token = watch_token domid in + List.iter + (fun d -> + List.iter (Xenstore_watch.unwatch ~xs token) (watches_of_device d) ) + (try IntMap.find domid !device_watches with Not_found -> []) ; + device_watches := IntMap.remove domid !device_watches ; + (* Anyone blocked on a domain/device operation which won't happen because + the domain just shutdown should be cancelled here. *) + debug "Cancelling watches for: domid %d" domid ; + Cancel_utils.on_shutdown ~xs domid ; + (* Finally, discard any device caching for the domid destroyed *) + DeviceCache.discard device_cache domid - let domain_type_path domid = - Printf.sprintf "/local/domain/%d/domain-type" domid + let qemu_disappeared di xc xs = + match !Xenopsd.action_after_qemu_crash with + | None -> + () + | Some action -> ( + debug "action-after-qemu-crash=%s" action ; + match action with + | "poweroff" -> + (* we do not expect a HVM guest to survive qemu disappearing, so + kill the VM *) + Domain.set_action_request ~xs di.Xenctrl.domid (Some "poweroff") + | "pause" -> + (* useful for debugging qemu *) + Domain.pause ~xc di.Xenctrl.domid + | _ -> + () + ) - let set_domain_type ~xs domid vm = - let domain_type = - match vm.ty with - | HVM _ -> - "hvm" - | PV _ -> - "pv" - | PVinPVH _ -> - "pv-in-pvh" - | PVH _ -> - "pvh" - in - xs.Xs.write (domain_type_path domid) domain_type + let add_device_watch xs dev = + let open Device_common in + debug "Adding watches for: %s" (string_of_device dev) ; + let domid = dev.frontend.domid in + let token = watch_token domid in + List.iter (Xenstore_watch.watch ~xs token) (watches_of_device dev) ; + device_watches := + IntMap.add domid + (dev :: IntMap.find domid !device_watches) + !device_watches - let get_domain_type ~xs di = - try - match xs.Xs.read (domain_type_path di.Xenctrl.domid) with - | "hvm" -> - Domain_HVM + let remove_device_watch xs dev = + let open Device_common in + debug "Removing watches for: %s" (string_of_device dev) ; + let domid = dev.frontend.domid in + let current = IntMap.find domid !device_watches in + let token = watch_token domid in + List.iter (Xenstore_watch.unwatch ~xs token) (watches_of_device dev) ; + device_watches := + IntMap.add domid (List.filter (fun x -> x <> dev) current) !device_watches + + let watch_fired xc xs path domains watches = + let look_for_different_devices domid = + if not (Xenstore_watch.IntSet.mem domid watches) then + debug "Ignoring frontend device watch on unmanaged domain: %d" domid + else if not (IntMap.mem domid !device_watches) then + warn + "Xenstore watch fired, but no entry for domid %d in device watches \ + list" + domid + else + let devices = IntMap.find domid !device_watches in + let devices' = Device_common.list_frontends ~xs domid in + let old_devices = + Xapi_stdext_std.Listext.List.set_difference devices devices' + in + let new_devices = + Xapi_stdext_std.Listext.List.set_difference devices' devices + in + List.iter (add_device_watch xs) new_devices ; + List.iter (remove_device_watch xs) old_devices + in + let uuid_of_domain di = + let string_of_domain_handle handle = + Array.to_list handle |> List.map string_of_int |> String.concat "; " + in + match Uuidx.of_int_array di.Xenctrl.handle with + | Some x -> + x + | None -> + failwith + (Printf.sprintf "VM handle for domain %i is an invalid uuid: %a" + di.Xenctrl.domid + (fun () -> string_of_domain_handle) + di.Xenctrl.handle + ) + in + let fire_event_on_vm domid = + let d = int_of_string domid in + let open Xenstore_watch in + if not (IntMap.mem d domains) then + debug "Ignoring watch on shutdown domain %d" d + else + let di = IntMap.find d domains in + let id = Uuidx.to_string (uuid_of_domain di) in + Updates.add (Dynamic.Vm id) internal_updates + in + let fire_event_on_device domid kind devid = + let d = int_of_string domid in + let open Xenstore_watch in + if not (IntMap.mem d domains) then + debug "Ignoring watch on shutdown domain %d" d + else + let di = IntMap.find d domains in + let id = Uuidx.to_string (uuid_of_domain di) in + let update = + match kind with + | "vbd" | "vbd3" | "qdisk" | "9pfs" -> + let devid' = + devid + |> int_of_string + |> Device_number.of_xenstore_key + |> Device_number.to_linux_device + in + Some (Dynamic.Vbd (id, devid')) + | "vif" -> + Some (Dynamic.Vif (id, devid)) + | x -> + debug "Unknown device kind: '%s'" x ; + None + in + Option.iter (fun x -> Updates.add x internal_updates) update + in + let fire_event_on_qemu domid = + let d = int_of_string domid in + let open Xenstore_watch in + if not (IntMap.mem d domains) then + debug "Ignoring qemu-pid-signal watch on shutdown domain %d" d + else + let signal = + try Some (xs.Xs.read (Service.Qemu.pidxenstore_path_signal d)) + with _ -> None + in + match signal with + | None -> + () + | Some signal -> + debug "Received unexpected qemu-pid-signal %s for domid %d" signal d ; + let di = IntMap.find d domains in + let id = Uuidx.to_string (uuid_of_domain di) in + qemu_disappeared di xc xs ; + Updates.add (Dynamic.Vm id) internal_updates + in + match Astring.String.cuts ~empty:false ~sep:"/" path with + | "local" + :: "domain" + :: domid + :: "backend" + :: kind + :: frontend + :: devid + :: key -> + debug + "Watch on backend domid: %s kind: %s -> frontend domid: %s devid: %s" + domid kind frontend devid ; + fire_event_on_device frontend kind devid ; + (* If this event was a state change then this might be the first time we + see evidence of PV drivers *) + if key = ["state"] then + maybe_update_pv_drivers_detected ~xc ~xs (int_of_string frontend) path + | "local" :: "domain" :: frontend :: "device" :: _ -> + look_for_different_devices (int_of_string frontend) + | ["local"; "domain"; domid; "qemu-pid-signal"] -> + fire_event_on_qemu domid + | "local" :: "domain" :: domid :: _ -> + fire_event_on_vm domid + | ["vm"; uuid; "rtc"; "timeoffset"] -> + let timeoffset = try Some (xs.Xs.read path) with _ -> None in + Option.iter + (fun timeoffset -> + (* Store the rtc/timeoffset for migrate *) + store_rtc_timeoffset uuid timeoffset ; + (* Tell the higher-level toolstack about this too *) + Updates.add (Dynamic.Vm uuid) internal_updates + ) + timeoffset + | _ -> + debug "Ignoring unexpected watch: %s" path +end + +module Watcher = Xenstore_watch.WatchXenstore (Actions) + +module VM = struct + open Vm + + let will_be_hvm vm = match vm.ty with HVM _ -> true | _ -> false + + let profile_of ~vm = + if will_be_hvm vm then + Some (choose_qemu_dm vm.Xenops_interface.Vm.platformdata) + else + None + + let dm_of ~vm = dm_of ~vm:vm.Vm.id + + let compute_overhead persistent vcpu_max memory_static_max shadow_multiplier = + let open VmExtra in + let static_max_mib = Memory.mib_of_bytes_used memory_static_max in + let model = + match persistent.ty with + | Some (PV _) -> + Memory.Linux.overhead_mib + | Some (PVinPVH _) -> + Memory.PVinPVH.overhead_mib + | Some (HVM _ | PVH _) -> + Memory.HVM.overhead_mib + | None -> + failwith + "cannot compute memory overhead: unable to determine domain type" + in + model static_max_mib vcpu_max shadow_multiplier |> Memory.bytes_of_mib + + let shutdown_reason = function + | Reboot -> + Domain.Reboot + | PowerOff -> + Domain.PowerOff + | Suspend -> + Domain.Suspend + | Halt -> + Domain.Halt + | S3Suspend -> + Domain.S3Suspend + + (* We compute our initial target at memory reservation time, done before the + domain is created. We consume this information later when the domain is + built. *) + let set_initial_target ~xs domid initial_target = + xs.Xs.write + (Printf.sprintf "/local/domain/%d/memory/initial-target" domid) + (Int64.to_string initial_target) + + let get_initial_target ~xs domid = + Int64.of_string + (xs.Xs.read + (Printf.sprintf "/local/domain/%d/memory/initial-target" domid) + ) + + let domain_type_path domid = + Printf.sprintf "/local/domain/%d/domain-type" domid + + let set_domain_type ~xs domid vm = + let domain_type = + match vm.ty with + | HVM _ -> + "hvm" + | PV _ -> + "pv" + | PVinPVH _ -> + "pv-in-pvh" + | PVH _ -> + "pvh" + in + xs.Xs.write (domain_type_path domid) domain_type + + let get_domain_type ~xs di = + try + match xs.Xs.read (domain_type_path di.Xenctrl.domid) with + | "hvm" -> + Domain_HVM | "pv" -> Domain_PV | "pv-in-pvh" -> @@ -4957,349 +5300,6 @@ module UPDATES = struct let get last timeout = Updates.get "UPDATES.get" last timeout internal_updates end -module IntMap = Map.Make (struct - type t = int - - let compare = compare -end) - -module Actions = struct - (* CA-76600: the rtc/timeoffset needs to be maintained over a migrate. *) - let store_rtc_timeoffset vm timeoffset = - let _ = - DB.update vm - (Option.map (function {VmExtra.persistent} as extra -> - ( match persistent with - | {VmExtra.ty= Some (Vm.HVM hvm_info); _} -> - let platformdata = - ("timeoffset", timeoffset) - :: List.remove_assoc "timeoffset" persistent.platformdata - in - let persistent = - { - persistent with - VmExtra.ty= Some (Vm.HVM {hvm_info with Vm.timeoffset}) - ; platformdata - } - in - debug "VM = %s; rtc/timeoffset <- %s" vm timeoffset ; - VmExtra.{persistent} - | _ -> - extra - ) - ) - ) - in - () - - let xenbus_connected = Xenbus_utils.(int_of Connected) |> string_of_int - - let maybe_update_pv_drivers_detected ~xc ~xs domid path = - let vm = get_uuid ~xc domid |> Uuidx.to_string in - Option.iter - (function - | {VmExtra.persistent} -> ( - try - let value = xs.Xs.read path in - let pv_drivers_detected = - match - ( value = xenbus_connected - , persistent.VmExtra.pv_drivers_detected - ) - with - | true, false -> - (* State "connected" (4) means that PV drivers are present for - this device *) - debug "VM = %s; found PV driver evidence on %s (value = %s)" - vm path value ; - true - | false, true -> - (* This device is not connected, while earlier we detected PV - drivers. We conclude that drivers are still present if any - other device is connected. *) - let devices = Device_common.list_frontends ~xs domid in - let found = - (* Return `true` as soon as a device in state 4 is found. *) - List.exists - (fun device -> - try - xs.Xs.read - (Device_common.backend_state_path_of_device ~xs - device - ) - = xenbus_connected - with Xs_protocol.Enoent _ -> false - ) - devices - in - if not found then (* No devices in state "connected" (4) *) - debug "VM = %s; lost PV driver evidence" vm ; - found - | _ -> - (* No change *) - persistent.VmExtra.pv_drivers_detected - in - let updated = - DB.update vm - (Option.map (function {VmExtra.persistent} -> - let persistent = - {persistent with VmExtra.pv_drivers_detected} - in - VmExtra.{persistent} - ) - ) - in - if updated then - Updates.add (Dynamic.Vm vm) internal_updates - with Xs_protocol.Enoent _ -> - warn "Watch event on %s fired but couldn't read from it" path ; - () - (* the path must have disappeared immediately after the watch fired. - Let's treat this as if we never saw it. *) - ) - ) - (DB.read vm) - - let interesting_paths_for_domain domid uuid = - let open Printf in - [ - sprintf "/local/domain/%d/attr" domid - ; sprintf "/local/domain/%d/data/ts" domid - ; sprintf "/local/domain/%d/data/service" domid - ; sprintf "/local/domain/%d/data/pvs_target" domid - ; sprintf "/local/domain/%d/memory/target" domid - ; sprintf "/local/domain/%d/memory/uncooperative" domid - ; sprintf "/local/domain/%d/console/vnc-port" domid - ; sprintf "/local/domain/%d/console/tc-port" domid - ; Service.Qemu.pidxenstore_path_signal domid - ; sprintf "/local/domain/%d/control" domid - ; sprintf "/local/domain/%d/device" domid - ; sprintf "/local/domain/%d/rrd" domid - ; sprintf "/local/domain/%d/vm-data" domid - ; sprintf "/local/domain/%d/feature" domid - ; sprintf "/vm/%s/rtc/timeoffset" uuid - ; sprintf "/local/domain/%d/xenserver/attr" domid - ] - - let watch_token domid = Printf.sprintf "xenopsd-xc:domain-%d" domid - - let watches_of_device dev = - let interesting_backend_keys = - [ - "kthread-pid" - ; "tapdisk-pid" - ; "shutdown-done" - ; "hotplug-status" - ; "params" - ; "state" - ] - in - let open Device_common in - let be = dev.backend.domid in - let fe = dev.frontend.domid in - let kind = string_of_kind dev.backend.kind in - let devid = dev.frontend.devid in - List.map - (fun k -> - Printf.sprintf "/local/domain/%d/backend/%s/%d/%d/%s" be kind fe devid k - ) - interesting_backend_keys - - let unmanaged_domain domid id = domid > 0 && not (DB.exists id) - - let found_running_domain _domid id = - Updates.add (Dynamic.Vm id) internal_updates - - let device_watches = ref IntMap.empty - - let domain_appeared _xc _xs domid = - device_watches := IntMap.add domid [] !device_watches - - let domain_disappeared _xc xs domid = - let token = watch_token domid in - List.iter - (fun d -> - List.iter (Xenstore_watch.unwatch ~xs token) (watches_of_device d) - ) - (try IntMap.find domid !device_watches with Not_found -> []) ; - device_watches := IntMap.remove domid !device_watches ; - (* Anyone blocked on a domain/device operation which won't happen because - the domain just shutdown should be cancelled here. *) - debug "Cancelling watches for: domid %d" domid ; - Cancel_utils.on_shutdown ~xs domid ; - (* Finally, discard any device caching for the domid destroyed *) - DeviceCache.discard device_cache domid - - let qemu_disappeared di xc xs = - match !Xenopsd.action_after_qemu_crash with - | None -> - () - | Some action -> ( - debug "action-after-qemu-crash=%s" action ; - match action with - | "poweroff" -> - (* we do not expect a HVM guest to survive qemu disappearing, so - kill the VM *) - Domain.set_action_request ~xs di.Xenctrl.domid (Some "poweroff") - | "pause" -> - (* useful for debugging qemu *) - Domain.pause ~xc di.Xenctrl.domid - | _ -> - () - ) - - let add_device_watch xs dev = - let open Device_common in - debug "Adding watches for: %s" (string_of_device dev) ; - let domid = dev.frontend.domid in - let token = watch_token domid in - List.iter (Xenstore_watch.watch ~xs token) (watches_of_device dev) ; - device_watches := - IntMap.add domid - (dev :: IntMap.find domid !device_watches) - !device_watches - - let remove_device_watch xs dev = - let open Device_common in - debug "Removing watches for: %s" (string_of_device dev) ; - let domid = dev.frontend.domid in - let current = IntMap.find domid !device_watches in - let token = watch_token domid in - List.iter (Xenstore_watch.unwatch ~xs token) (watches_of_device dev) ; - device_watches := - IntMap.add domid (List.filter (fun x -> x <> dev) current) !device_watches - - let watch_fired xc xs path domains watches = - let look_for_different_devices domid = - if not (Xenstore_watch.IntSet.mem domid watches) then - debug "Ignoring frontend device watch on unmanaged domain: %d" domid - else if not (IntMap.mem domid !device_watches) then - warn - "Xenstore watch fired, but no entry for domid %d in device watches \ - list" - domid - else - let devices = IntMap.find domid !device_watches in - let devices' = Device_common.list_frontends ~xs domid in - let old_devices = - Xapi_stdext_std.Listext.List.set_difference devices devices' - in - let new_devices = - Xapi_stdext_std.Listext.List.set_difference devices' devices - in - List.iter (add_device_watch xs) new_devices ; - List.iter (remove_device_watch xs) old_devices - in - let uuid_of_domain di = - let string_of_domain_handle handle = - Array.to_list handle |> List.map string_of_int |> String.concat "; " - in - match Uuidx.of_int_array di.Xenctrl.handle with - | Some x -> - x - | None -> - failwith - (Printf.sprintf "VM handle for domain %i is an invalid uuid: %a" - di.Xenctrl.domid - (fun () -> string_of_domain_handle) - di.Xenctrl.handle - ) - in - let fire_event_on_vm domid = - let d = int_of_string domid in - let open Xenstore_watch in - if not (IntMap.mem d domains) then - debug "Ignoring watch on shutdown domain %d" d - else - let di = IntMap.find d domains in - let id = Uuidx.to_string (uuid_of_domain di) in - Updates.add (Dynamic.Vm id) internal_updates - in - let fire_event_on_device domid kind devid = - let d = int_of_string domid in - let open Xenstore_watch in - if not (IntMap.mem d domains) then - debug "Ignoring watch on shutdown domain %d" d - else - let di = IntMap.find d domains in - let id = Uuidx.to_string (uuid_of_domain di) in - let update = - match kind with - | "vbd" | "vbd3" | "qdisk" | "9pfs" -> - let devid' = - devid - |> int_of_string - |> Device_number.of_xenstore_key - |> Device_number.to_linux_device - in - Some (Dynamic.Vbd (id, devid')) - | "vif" -> - Some (Dynamic.Vif (id, devid)) - | x -> - debug "Unknown device kind: '%s'" x ; - None - in - Option.iter (fun x -> Updates.add x internal_updates) update - in - let fire_event_on_qemu domid = - let d = int_of_string domid in - let open Xenstore_watch in - if not (IntMap.mem d domains) then - debug "Ignoring qemu-pid-signal watch on shutdown domain %d" d - else - let signal = - try Some (xs.Xs.read (Service.Qemu.pidxenstore_path_signal d)) - with _ -> None - in - match signal with - | None -> - () - | Some signal -> - debug "Received unexpected qemu-pid-signal %s for domid %d" signal d ; - let di = IntMap.find d domains in - let id = Uuidx.to_string (uuid_of_domain di) in - qemu_disappeared di xc xs ; - Updates.add (Dynamic.Vm id) internal_updates - in - match Astring.String.cuts ~empty:false ~sep:"/" path with - | "local" - :: "domain" - :: domid - :: "backend" - :: kind - :: frontend - :: devid - :: key -> - debug - "Watch on backend domid: %s kind: %s -> frontend domid: %s devid: %s" - domid kind frontend devid ; - fire_event_on_device frontend kind devid ; - (* If this event was a state change then this might be the first time we - see evidence of PV drivers *) - if key = ["state"] then - maybe_update_pv_drivers_detected ~xc ~xs (int_of_string frontend) path - | "local" :: "domain" :: frontend :: "device" :: _ -> - look_for_different_devices (int_of_string frontend) - | ["local"; "domain"; domid; "qemu-pid-signal"] -> - fire_event_on_qemu domid - | "local" :: "domain" :: domid :: _ -> - fire_event_on_vm domid - | ["vm"; uuid; "rtc"; "timeoffset"] -> - let timeoffset = try Some (xs.Xs.read path) with _ -> None in - Option.iter - (fun timeoffset -> - (* Store the rtc/timeoffset for migrate *) - store_rtc_timeoffset uuid timeoffset ; - (* Tell the higher-level toolstack about this too *) - Updates.add (Dynamic.Vm uuid) internal_updates - ) - timeoffset - | _ -> - debug "Ignoring unexpected watch: %s" path -end - -module Watcher = Xenstore_watch.WatchXenstore (Actions) - (* Here we analyse common startup errors in more detail and suggest the most likely fixes (e.g. switch to root, start missing service) *) From 69679832d93b8550cdc9de69ec396551d97e0879 Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Fri, 28 Nov 2025 12:50:37 +0800 Subject: [PATCH 2/3] CA-419908: Update xenstore watcher to refresh domains when VM is renamed The xenstore watcher maintains a map from domid to VM UUID. This map is used to dispatch the xenstore events. When the VM is renamed, its UUID changes. Hence this map needs to refresh. Otherwise, the xenstore events could not be dispatched to renamed VM. Signed-off-by: Ming Lu --- ocaml/libs/ezxenstore/watch/ez_xenstore_watch.ml | 14 +++++++++++--- ocaml/xenopsd/xc/xenops_server_xen.ml | 3 ++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/ocaml/libs/ezxenstore/watch/ez_xenstore_watch.ml b/ocaml/libs/ezxenstore/watch/ez_xenstore_watch.ml index e552ecb1e5..b90f3e621c 100644 --- a/ocaml/libs/ezxenstore/watch/ez_xenstore_watch.ml +++ b/ocaml/libs/ezxenstore/watch/ez_xenstore_watch.ml @@ -115,6 +115,10 @@ module Make (Debug : DEBUG) = struct in List.map fst (IntMap.bindings c) + let need_refresh_domains = Atomic.make false + + let mark_refresh_domains () = Atomic.set need_refresh_domains true + let with_xc_and_xs f = Xenctrl.with_intf (fun xc -> with_xs (fun xs -> f xc xs)) @@ -196,9 +200,13 @@ module Make (Debug : DEBUG) = struct in let process_one_watch c (path, _token) = - if path = _introduceDomain || path = _releaseDomain then - look_for_different_domains () - else + if + Atomic.exchange need_refresh_domains false + || path = _introduceDomain + || path = _releaseDomain + then + look_for_different_domains () ; + if path <> _introduceDomain && path <> _releaseDomain then Client.immediate c (fun h -> let xs = Xs.ops h in Actions.watch_fired xc xs path !domains !watches diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 4f73cff49b..101568106a 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -1995,7 +1995,8 @@ module VM = struct ) ; debug "Moving xenstore tree" ; Domain.move_xstree ~xs di.Xenctrl.domid old_name new_name ; - DB.rename old_name new_name + DB.rename old_name new_name ; + Watcher.mark_refresh_domains () in Option.iter rename_domain (di_of_uuid ~xc (uuid_of_string old_name)) From 1b7f1490335ca7856c0ed3a68cfde4b1ed70eb79 Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Mon, 1 Dec 2025 10:58:18 +0800 Subject: [PATCH 3/3] Refactor IntMap to use built-in Int module in Map.Make Signed-off-by: Ming Lu --- ocaml/xenopsd/xc/xenops_server_xen.ml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index 101568106a..3e7bd2a358 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -25,12 +25,7 @@ module D = Debug.Make (struct let name = service_name end) open D module RRDD = Rrd_client.Client module StringSet = Set.Make (String) - -module IntMap = Map.Make (struct - type t = int - - let compare = compare -end) +module IntMap = Map.Make (Int) let finally = Xapi_stdext_pervasives.Pervasiveext.finally