Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

CLEARWATER: CA-96886: Fix race for a PCI device by multiple starting VMs #1075

Merged
merged 1 commit into from

2 participants

@robhoes
Owner

When multiple simultaneously starting VMs try to grab the same PCI device, only one of them
may get it, and the others must fail to start. However, xapi considers a PCI device "assigned"
to a VM only when the VM has started and the device has been made available to the VM, which
is some time after xapi has asked xenopsd to start the VM and give it the PCI device. This means
that there is a window during which multiple VM may be given the same device.

This patch attempts to resolve this by temporarily reserving a PCI device for a VM in xapi, before telling xenopsd to start the VM.

This is a copy of #994 for Clearwater

@malcolmcrossley
Collaborator

Hi Rob, Sorry about the delay but I was trying to keep the clearwater-ring3 branch clean whilst trying to get the "clearwater-trunk-merge" branch in.

Can you rebase this pull request onto of the current clearwater branch please?

@robhoes robhoes CA-96886: Fix race for a PCI device by multiple starting VMs
When multiple simultaneously starting VMs try to grab the same PCI device, only one of them
may get it, and the others must fail to start. However, xapi considers a PCI device "assigned"
to a VM only when the VM has started and the device has been made available to the VM, which
is some time after xapi has asked xenopsd to start the VM and give it the PCI device. This means
that there is a window during which multiple VM may be given the same device.

This patch attempts to resolve this by temporarily reserving a PCI device for a VM in xapi, before telling xenopsd to start the VM.

Signed-off-by: Rob Hoes <rob.hoes@citrix.com>
91e2c38
@robhoes
Owner

@malcolmcrossley Ok, done.

@malcolmcrossley malcolmcrossley merged commit 37dbc4a into xapi-project:clearwater

1 check was pending

default Merged build triggered.
@robhoes robhoes deleted the robhoes:ca96886-cw branch
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Apr 8, 2013
  1. @robhoes

    CA-96886: Fix race for a PCI device by multiple starting VMs

    robhoes authored
    When multiple simultaneously starting VMs try to grab the same PCI device, only one of them
    may get it, and the others must fail to start. However, xapi considers a PCI device "assigned"
    to a VM only when the VM has started and the device has been made available to the VM, which
    is some time after xapi has asked xenopsd to start the VM and give it the PCI device. This means
    that there is a window during which multiple VM may be given the same device.
    
    This patch attempts to resolve this by temporarily reserving a PCI device for a VM in xapi, before telling xenopsd to start the VM.
    
    Signed-off-by: Rob Hoes <rob.hoes@citrix.com>
This page is out of date. Refresh to see the latest.
View
2  ocaml/test/OMakefile
@@ -1,5 +1,5 @@
OCAMLPACKS = oUnit sexpr log xmlm stunnel xml-light2 http-svr uuid netdev \
- tapctl rss xenctrl xenctrlext xenstore xenstoreext cpuid pciutil
+ tapctl rss xenctrl xenctrlext xenstore xenstoreext cpuid pciutil oclock
OCAMLINCLUDES = \
../database \
View
46 ocaml/xapi/pciops.ml
@@ -16,12 +16,58 @@ open D
open Listext
open Stringext
+open Threadext
+
+let reservations : (API.ref_PCI, int64) Hashtbl.t = Hashtbl.create 5
+let m = Mutex.create ()
let get_free_functions ~__context pci =
let assignments = List.length (Db.PCI.get_attached_VMs ~__context ~self:pci) in
let functions = Int64.to_int (Db.PCI.get_functions ~__context ~self:pci) in
functions - assignments
+let reserve ~__context pci =
+ Mutex.execute m (fun () ->
+ let pci_id = Db.PCI.get_pci_id ~__context ~self:pci in
+ (* Only attempt to make a reservation if the PCI device is actually free *)
+ if get_free_functions ~__context pci <= 0 then begin
+ debug "PCI device %s is already in use by another VM" pci_id;
+ false
+ end else begin
+ (* Get a timestamp in nano seconds *)
+ let timestamp = Oclock.gettime Oclock.monotonic in
+ let reserved =
+ try
+ let timestamp' = Hashtbl.find reservations pci in
+ if Int64.sub timestamp timestamp' > Int64.of_float 3e11 then begin
+ (* The previous reservation has expired, so remove it *)
+ Hashtbl.remove reservations pci;
+ false
+ end else begin
+ debug "PCI device %s was reserved by another VM just %Ldms ago"
+ pci_id (Int64.div (Int64.sub timestamp timestamp') 1000000L);
+ true
+ end
+ with Not_found ->
+ false
+ in
+ if reserved then
+ false
+ else begin
+ debug "Adding a temporary reservation for PCI device %s" pci_id;
+ Hashtbl.add reservations pci timestamp;
+ true
+ end
+ end
+ )
+
+let unreserve ~__context pci =
+ Mutex.execute m (fun () ->
+ let pci_id = Db.PCI.get_pci_id ~__context ~self:pci in
+ debug "Removing any temporary reservations for PCI device %s" pci_id;
+ Hashtbl.remove reservations pci
+ )
+
let unassign_all_for_vm ~__context vm =
(* Db.VM.set_attached_PCIs ~__context ~self:vm ~value:[] *)
let pcis = Db.VM.get_attached_PCIs ~__context ~self:vm in
View
11 ocaml/xapi/pciops.mli
@@ -14,7 +14,16 @@
(** Module that handles assigning PCI devices to VMs.
* @group Virtual-Machine Management
*)
-
+
+(** Check whether a given PCI device is free. If so, make a temporary
+ * reservation, such that no other VM can steal it before the device
+ * has been passed through. A reservation automatically expires after
+ * 5 minutes. *)
+val reserve : __context:Context.t -> [ `PCI ] Ref.t -> bool
+
+(** Explicitly release any temporary reservation on a given PCI device. *)
+val unreserve : __context:Context.t -> [ `PCI ] Ref.t -> unit
+
(** Check if a given PCI device is free. *)
val get_free_functions : __context:Context.t -> [ `PCI ] Ref.t -> int
View
21 ocaml/xapi/vgpuops.ml
@@ -40,19 +40,22 @@ let create_vgpu ~__context ~vm vgpu available_pgpus pcis =
debug "Create vGPUs";
let compatible_pgpus = Db.GPU_group.get_PGPUs ~__context ~self:vgpu.gpu_group_ref in
let pgpus = List.intersect compatible_pgpus available_pgpus in
- let free_pgpus = List.filter_map (fun pgpu ->
- let pci = Db.PGPU.get_PCI ~__context ~self:pgpu in
- if Pciops.get_free_functions ~__context pci <= 0
- then None
- else Some (pgpu, pci)
- ) pgpus in
- match free_pgpus with
- | [] ->
+ let rec reserve_one = function
+ | [] -> None
+ | pgpu :: remaining ->
+ let pci = Db.PGPU.get_PCI ~__context ~self:pgpu in
+ if Pciops.reserve ~__context pci then
+ Some (pgpu, pci)
+ else
+ reserve_one remaining
+ in
+ match reserve_one pgpus with
+ | None ->
raise (Api_errors.Server_error (Api_errors.vm_requires_gpu, [
Ref.string_of vm;
Ref.string_of vgpu.gpu_group_ref
]))
- | (pgpu, pci) :: _ ->
+ | Some (pgpu, pci) ->
List.filter (fun g -> g <> pgpu) available_pgpus,
pci :: pcis
View
4 ocaml/xapi/xapi_xenops.ml
@@ -1205,6 +1205,10 @@ let update_pci ~__context id =
else if (not attached_in_db) && state.plugged
then Db.PCI.add_attached_VMs ~__context ~self:pci ~value:vm;
+ (* Release any temporary reservations of this PCI device, as it is now permanently
+ * assigned or unassigned. *)
+ Pciops.unreserve ~__context pci;
+
Opt.iter
(fun gpu ->
debug "xenopsd event: Update VGPU %s.%s currently_attached <- %b" (fst id) (snd id) state.plugged;
Something went wrong with that request. Please try again.