diff --git a/_oasis b/_oasis index ed37c80..f3b06e1 100644 --- a/_oasis +++ b/_oasis @@ -30,7 +30,7 @@ Executable "xenvmd" MainIs: xenvmd.ml Custom: true Install: false - BuildDepends: lwt, lwt.unix, mirage-block-unix, mirage-clock-unix, devmapper, threads, lvm, cstruct, oUnit, io-page, io-page.unix, cmdliner, sexplib.syntax, xenvmidl, lvm, lvm.mapper, shared-block-ring + BuildDepends: lwt, lwt.unix, mirage-block-unix, mirage-clock-unix, devmapper, threads, lvm, cstruct, oUnit, io-page, io-page.unix, stringext, cmdliner, sexplib.syntax, xenvmidl, lvm, lvm.mapper, shared-block-ring Executable "xenvm" ByteOpt: -warn-error +a diff --git a/_tags b/_tags index 2ca2171..51125fb 100644 --- a/_tags +++ b/_tags @@ -1,5 +1,5 @@ # OASIS_START -# DO NOT EDIT (digest: ff372a3fb61a268f33926cbcbfd6c5ab) +# DO NOT EDIT (digest: 2190fe0a8a00a6b8cc32c1b1dff4f3d1) # Ignore VCS directories, you can use the same kind of rule outside # OASIS_START/STOP if you want to exclude directories that contains # useless stuff for the build process @@ -52,6 +52,7 @@ true: annot, bin_annot : pkg_sexplib : pkg_sexplib.syntax : pkg_shared-block-ring +: pkg_stringext : pkg_threads : use_xenvmidl : pkg_cmdliner @@ -72,6 +73,7 @@ true: annot, bin_annot : pkg_sexplib : pkg_sexplib.syntax : pkg_shared-block-ring +: pkg_stringext : pkg_threads : use_xenvmidl : custom diff --git a/cleanup.sh b/cleanup.sh index ef38313..a5b6f21 100755 --- a/cleanup.sh +++ b/cleanup.sh @@ -22,4 +22,4 @@ if [ "$USE_MOCK" -eq "0" ]; then losetup -d $LOOP fi -rm -f localJournal bigdisk *.out djstest-* dm-mock +rm -f localJournal bigdisk *.out* djstest-* dm-mock diff --git a/opam b/opam index 98309a0..384ba42 100644 --- a/opam +++ b/opam @@ -27,4 +27,5 @@ depends: [ "ounit" "oasis" "ocveralls" + "stringext" ] diff --git a/setup.sh b/setup.sh index ce6cc11..f222efa 100755 --- a/setup.sh +++ b/setup.sh @@ -69,14 +69,20 @@ cat test.local_allocator.conf.in | sed -r "s|@BIGDISK@|$LOOP|g" | sed -r "s|@HO sleep 30 ./xenvm.native host-list /dev/djstest --configdir /tmp/xenvm.d $MOCK_ARG +# Let's check that xenvmd retains its list of connected hosts over a restart +./xenvm.native host-list /dev/djstest --configdir /tmp/xenvm.d $MOCK_ARG | sort > host-list.out +kill `cat /tmp/xenvmd.lock` +./xenvmd.native --config ./test.xenvmd.conf > xenvmd.log.2 & +sleep 10 +./xenvm.native host-list /dev/djstest --configdir /tmp/xenvm.d $MOCK_ARG | sort > host-list.out2 +diff -u host-list.out host-list.out2 + # destroy hosts ./xenvm.native host-disconnect /dev/djstest host2 --configdir /tmp/xenvm.d $MOCK_ARG ./xenvm.native host-destroy /dev/djstest host2 --configdir /tmp/xenvm.d $MOCK_ARG ./xenvm.native host-disconnect /dev/djstest host1 --configdir /tmp/xenvm.d $MOCK_ARG ./xenvm.native host-destroy /dev/djstest host1 --configdir /tmp/xenvm.d $MOCK_ARG -./xenvm.native host-list /dev/djstest --configdir /tmp/xenvm.d $MOCK_ARG - #shutdown ./xenvm.native lvchange -an /dev/djstest/live --configdir /tmp/xenvm.d $MOCK_ARG || true ./xenvm.native shutdown /dev/djstest --configdir /tmp/xenvm.d $MOCK_ARG diff --git a/xenvmd/xenvmd.ml b/xenvmd/xenvmd.ml index e79a56e..d08be5e 100644 --- a/xenvmd/xenvmd.ml +++ b/xenvmd/xenvmd.ml @@ -31,6 +31,8 @@ let fatal_error_t msg = error "%s" msg; fail (Failure msg) +let connected_tag = "xenvm_connected" + let fatal_error msg m = m >>= function | `Error (`Msg x) -> fatal_error_t (msg ^ ": " ^ x) | `Error `Suspended -> fatal_error_t (msg ^ ": queue is suspended") @@ -291,6 +293,11 @@ module VolumeManager = struct end else begin match Vg_IO.find vg toLVM, Vg_IO.find vg fromLVM, Vg_IO.find vg freeLVM with | Some toLVM_id, Some fromLVM_id, Some freeLVM_id -> + (* Persist at this point that we're going to connect this host *) + (* All of the following logic is idempotent *) + write (fun vg -> + Lvm.Vg.add_tag vg toLVM connected_tag + ) >>= fun () -> Hashtbl.replace host_connections name Xenvm_interface.Resuming_to_LVM; let background_t () = Vg_IO.Volume.connect toLVM_id @@ -305,7 +312,6 @@ module VolumeManager = struct ToLVM.resume toLVM_q >>= fun () -> - Vg_IO.Volume.connect fromLVM_id >>= function | `Error _ -> fail (Failure (Printf.sprintf "Failed to open %s" fromLVM)) @@ -394,7 +400,7 @@ module VolumeManager = struct let to_lvm = List.assoc name !to_LVMs in debug "Suspending ToLVM queue for %s" name; ToLVM.suspend to_lvm - >>= fun () -> + >>= fun () -> (* There may still be updates in the ToLVM queue *) Lwt_mutex.with_lock flush_m (fun () -> flush_already_locked name) >>= fun () -> @@ -402,6 +408,10 @@ module VolumeManager = struct to_LVMs := List.filter (fun (n, _) -> n <> name) !to_LVMs; from_LVMs := List.filter (fun (n, _) -> n <> name) !from_LVMs; free_LVs := List.filter (fun (n, _) -> n <> name) !free_LVs; + let toLVM = toLVM name in + write (fun vg -> + Lvm.Vg.remove_tag vg toLVM connected_tag + ) >>= fun () -> Hashtbl.remove host_connections name; return () | x -> @@ -455,6 +465,30 @@ module VolumeManager = struct else None in return { Xenvm_interface.name; connection_state; fromLVM; toLVM; freeExtents } ) !to_LVMs + + let reconnect_all () = + read (fun vg -> + debug "Reconnecting"; + Lvm.Vg.LVs.fold (fun key v acc -> + debug "Checking LV: %s" v.Lvm.Lv.name; + let name = v.Lvm.Lv.name in + match Stringext.split name ~on:'-' |> List.rev with + | "toLVM" :: host_bits -> + let host = String.concat "-" (List.rev host_bits) in + debug "This is a 'toLVM' LV"; + (* It's a toLVM - check to see whether it has the 'connected' flag *) + let tags = List.map Lvm.Name.Tag.to_string v.Lvm.Lv.tags in + let was_connected = List.mem connected_tag tags in + debug "host=%s was_connected=%b" host was_connected; + (host,was_connected)::acc + | e -> + debug "got list: %s" (String.concat "," e); + acc) + vg.Lvm.Vg.lvs [] |> Lwt.return + ) >>= fun host_states -> + Lwt_list.iter_s (fun (host, was_connected) -> + if was_connected then connect host else disconnect host) host_states + end let flush_all () = @@ -740,7 +774,9 @@ let run port sock_path config = >>= fun () -> FreePool.start Xenvm_interface._journal_name >>= fun () -> - + VolumeManager.Host.reconnect_all () + >>= fun () -> + let rec service_queues () = (* 0. Have any local allocators restarted? *) FreePool.resend_free_volumes config