Merge pull request #3418 from minishrink/gpu_ppx

CP-26717: Update Xapi to use PPX-based Gpumon IDL
xapi-project · Feb 2, 2018 · 2a78d9d · 2a78d9d
2 parents 10608a2 + 4b3baa8
commit 2a78d9d
Show file tree

Hide file tree

Showing 2 changed files with 40 additions and 41 deletions.
diff --git a/ocaml/xapi/xapi_gpumon.ml b/ocaml/xapi/xapi_gpumon.ml
@@ -45,7 +45,7 @@ module Nvidia = struct
   let get_pgpu_compatibility_metadata ~dbg ~pgpu_pci_address =
     let metadata =
       pgpu_pci_address
-      |> Gpumon_client.Client.Nvidia.get_pgpu_metadata dbg 
+      |> Gpumon_client.Client.Nvidia.get_pgpu_metadata dbg
       |> Stdext.Base64.encode
     in [key, metadata]
 
@@ -70,30 +70,30 @@ module Nvidia = struct
    * one vGPU per VM. The underdyling problem is that there is no
    * mapping between a vGPU and vgpu_instance in the NVIDIA library
    * currently.
-   *)
+  *)
   let get_vgpu_compatibility_metadata ~__context ~vgpu =
     let this = "get_vgpu_compatibility_metadata" in
     try
       let dbg   = Context.string_of_task __context in
       let vm    = Db.VGPU.get_VM ~__context ~self:vgpu in
       let domid = Db.VM.get_domid ~__context ~self:vm |> Int64.to_int in
       Db.VGPU.get_resident_on ~__context ~self:vgpu
-      |> fun self -> Db.PGPU.get_PCI ~__context ~self
-      |> fun self -> Db.PCI.get_pci_id ~__context ~self
+      |> (fun self -> Db.PGPU.get_PCI ~__context ~self)
+      |> (fun self -> Db.PCI.get_pci_id ~__context ~self)
       |> Gpumon_client.Client.Nvidia.get_vgpu_metadata dbg domid
-      |> function
-        | []      -> []
-        | [meta]  -> [key, Stdext.Base64.encode meta]
-        | _::_    -> failwith @@ Printf.sprintf
-          "%s: VM %s (dom %d) has more than one NVIDIA vGPU (%s)"
-          this (Ref.string_of vm) domid __LOC__
+      |> (function
+          | []      -> []
+          | [meta]  -> [key, Stdext.Base64.encode meta]
+          | _::_    -> failwith @@ Printf.sprintf
+              "%s: VM %s (dom %d) has more than one NVIDIA vGPU (%s)"
+              this (Ref.string_of vm) domid __LOC__)
     with
-      | Gpumon_interface.NvmlInterfaceNotAvailable ->
-        let host = Helpers.get_localhost ~__context |> Ref.string_of in
-        raise Api_errors.(Server_error (nvidia_tools_error, [host]))
-      | err ->
-        let msg = Printexc.to_string err in
-        raise Api_errors.(Server_error (internal_error, [msg]))
+    | Gpumon_interface.(Gpumon_error NvmlInterfaceNotAvailable) ->
+      let host = Helpers.get_localhost ~__context |> Ref.string_of in
+      raise Api_errors.(Server_error (nvidia_tools_error, [host]))
+    | err ->
+      let msg = Printexc.to_string err in
+      raise Api_errors.(Server_error (internal_error, [msg]))
 
 
   (* N.B. the vgpu (and the vm) must be in the local host where this function runs *)
@@ -102,30 +102,29 @@ module Nvidia = struct
     let vm_domid = Int64.to_int (Db.VM.get_domid ~__context ~self:vm) in
     let pgpu_metadata = Stdext.Base64.decode encoded_pgpu_metadata in
     match vgpu_impl ~__context vgpu with
-    | `passthrough | `gvt_g | `mxgpu -> 
+    | `passthrough | `gvt_g | `mxgpu ->
       debug "Skipping, vGPU %s implementation for VM %s is not Nvidia" (Ref.string_of vgpu) (Ref.string_of vm)
     | `nvidia ->
-      let local_pgpu_address = 
+      let local_pgpu_address =
         Db.VGPU.get_resident_on ~__context ~self:vgpu
         |> (fun self -> Db.PGPU.get_PCI ~__context ~self)
         |> (fun self -> Db.PCI.get_pci_id ~__context ~self)
       in
-      let compatibility = 
+      let compatibility =
         try
-          Gpumon_client.Client.Nvidia.get_pgpu_vm_compatibility dbg 
+          Gpumon_client.Client.Nvidia.get_pgpu_vm_compatibility dbg
             local_pgpu_address vm_domid pgpu_metadata
         with
-        | Gpumon_interface.NvmlInterfaceNotAvailable ->
+        | Gpumon_interface.(Gpumon_error NvmlInterfaceNotAvailable) ->
           let host = Db.VM.get_resident_on ~__context ~self:vm in
           raise Api_errors.(Server_error (nvidia_tools_error, [Ref.string_of host]))
         | err -> raise Api_errors.(Server_error (internal_error, [Printexc.to_string err]))
       in
-      let open Gpumon_interface in
       match compatibility with
-      | Compatible -> 
+      | Gpumon_interface.Compatible ->
         info "VM %s Nvidia vGPU is compatible with the destination pGPU on host %s"
           (Ref.string_of vm) (Ref.string_of dest_host)
-      | Incompatible reasons ->
+      | Gpumon_interface.(Incompatible reasons) ->
         raise Api_errors.(Server_error (
             vgpu_destination_incompatible,
             [ String.concat ", " (List.map reason_to_string reasons)
@@ -138,7 +137,7 @@ module Nvidia = struct
    * compatible according to their abstract compatibility metadata. This
    * code can run on any host. If no vGPU or pGPU metadata is
    * available, compatibility is assumed.
-   *)
+  *)
   let vgpu_pgpu_are_compatible ~__context ~vgpu ~pgpu =
     let dbg        = Context.string_of_task __context in
     let localhost  = Helpers.get_localhost ~__context in
@@ -151,7 +150,7 @@ module Nvidia = struct
         |> Stdext.Base64.decode in
       let vgpu_metadata () =
         Db.VGPU.get_compatibility_metadata ~__context ~self:vgpu
-        |> fun md -> [List.assoc key md]
+        |> (fun md -> [List.assoc key md])
         |> List.map Stdext.Base64.decode in
       match
         Gpumon_client.Client.Nvidia.get_pgpu_vgpu_compatibility
@@ -177,7 +176,7 @@ module Nvidia = struct
           (reasons |> List.map reason_to_string |> String.concat "; ");
         false
       (* errors *)
-      | exception Gpumon_interface.NvmlInterfaceNotAvailable ->
+      | exception Gpumon_interface.(Gpumon_error NvmlInterfaceNotAvailable) ->
         raise Api_errors.(Server_error (nvidia_tools_error, [localhost']))
       | exception err ->
         raise Api_errors.(Server_error
@@ -211,9 +210,9 @@ let clear_vgpu_metadata ~__context ~vm =
   Db.VM.get_VGPUs ~__context ~self:vm
   |> List.filter (fun vgpu -> Db.is_valid_ref __context vgpu)
   |> List.iter (fun vgpu ->
-    Db.VGPU.get_compatibility_metadata ~__context ~self:vgpu
-    |> List.filter (fun (k,_) -> k <> Nvidia.key)
-    |> fun value ->
-        Db.VGPU.set_compatibility_metadata ~__context ~self:vgpu ~value)
+      Db.VGPU.get_compatibility_metadata ~__context ~self:vgpu
+      |> List.filter (fun (k,_) -> k <> Nvidia.key)
+      |> fun value ->
+      Db.VGPU.set_compatibility_metadata ~__context ~self:vgpu ~value)
 
 
diff --git a/ocaml/xapi/xapi_pgpu.ml b/ocaml/xapi/xapi_pgpu.ml
@@ -31,17 +31,17 @@ let calculate_max_capacities ~__context ~pCI ~size ~supported_VGPU_types =
 
 let fetch_compatibility_metadata ~__context ~pgpu_pci =
   if Db.PCI.get_vendor_id ~__context ~self:pgpu_pci =
-    Xapi_pci.id_of_int Xapi_vgpu_type.Nvidia.vendor_id
+     Xapi_pci.id_of_int Xapi_vgpu_type.Nvidia.vendor_id
   then (
-     let dbg = Context.string_of_task __context in
-     let pgpu_pci_address = Db.PCI.get_pci_id ~__context ~self:pgpu_pci in
-     Xapi_gpumon.Nvidia.get_pgpu_compatibility_metadata ~dbg ~pgpu_pci_address
+    let dbg = Context.string_of_task __context in
+    let pgpu_pci_address = Db.PCI.get_pci_id ~__context ~self:pgpu_pci in
+    Xapi_gpumon.Nvidia.get_pgpu_compatibility_metadata ~dbg ~pgpu_pci_address
   ) else []
 
 let maybe_fetch_compatibility_metadata ~__context ~pgpu_pci =
   try fetch_compatibility_metadata ~__context ~pgpu_pci
   with
-  | Gpumon_interface.NvmlInterfaceNotAvailable -> []
+  | Gpumon_interface.(Gpumon_error NvmlInterfaceNotAvailable) -> []
   | err ->
     debug "fetch_compatibility_metadata for pgpu_pci:%s failed with %s"
       (Ref.string_of pgpu_pci) (Printexc.to_string err);
@@ -53,12 +53,12 @@ let populate_compatibility_metadata ~__context ~pgpu ~pgpu_pci =
     let value = fetch_compatibility_metadata ~__context ~pgpu_pci in
     Db.PGPU.set_compatibility_metadata ~__context ~self:pgpu ~value
   with
-  | Gpumon_interface.NvmlInterfaceNotAvailable ->
-      info "%s: can't get compat data for pgpu_pci:%s, keeping existing data"
-        this (Ref.string_of pgpu_pci)
+  | Gpumon_interface.(Gpumon_error NvmlInterfaceNotAvailable) ->
+    info "%s: can't get compat data for pgpu_pci:%s, keeping existing data"
+      this (Ref.string_of pgpu_pci)
   | err ->
-      debug "%s: obtaining compat data for pgpu_pci:%s failed with %s"
-        this (Ref.string_of pgpu_pci) (Printexc.to_string err)
+    debug "%s: obtaining compat data for pgpu_pci:%s failed with %s"
+      this (Ref.string_of pgpu_pci) (Printexc.to_string err)
 
 let create ~__context ~pCI ~gPU_group ~host ~other_config
     ~supported_VGPU_types ~size ~dom0_access