Permalink
Fetching contributors…
Cannot retrieve contributors at this time
executable file 259 lines (235 sloc) 7.88 KB
#!/usr/bin/env python
import XenAPI
import getopt
import sys
import os
import commands
import random
import time
import httplib
import urllib
def check(svm, ip):
"""
checking that the pool is in the same condition as before
"""
global master
global masterref
global hosts
global vmrunning
flag = True
masterref2 = svm.xenapi.pool.get_all_records().values()[0]['master']
if masterref2 != masterref :
print "From " + ip + " point of view the pool master is " + svm.xenapi.host.get_record(masterref2)["address"]
flag = False
hosts2 = svm.xenapi.host.get_all_records()
if len(hosts) != len(hosts2) :
print "From " + ip + " point of view the number of hosts is changed."
flag = False
for k in hosts.keys() :
if not hosts2.has_key(k) :
print "From " + ip + " point of view " + hosts[k]["address"] + " is not present any more."
vmrecords2 = svm.xenapi.VM.get_all_records()
vmrunning2 = {}
for k, v in vmrecords2.iteritems() :
if v['power_state'] == 'Running' and int(v['domid']) == 0:
vmrunning2[k] = v
if len(vmrunning) != len(vmrunning2) :
print "From " + ip + " point of view some VMs have changed state."
flag = False
for k, v in vmrunning.iteritems() :
if not vmrunning2.has_key(k) :
print "From " + ip + " point of view " + v['name_label'] + " is not online any more."
if flag :
print "On %s everything is consistent." % ip
def help() :
print """
Usage: hatests <options> <test>
where options can be:
-w, --wait <seconds> wait time between stopping an host and restarting it
(default 120)
where test can be:
master_hard_failure
master_soft_failure
slave_hard_failure
slave_soft_failure
master_vif_unplug
"""
###### START ######
secs = 120
optlist, args = getopt.getopt(sys.argv[1:],"w:h", ["wait=", "help"])
for o, a in optlist:
if o == "-w" or o == "--wait":
secs = int(a)
elif o == "-h" or o == "--help" :
help()
sys.exit(0)
if len(args) != 1 :
help()
sys.exit(1)
##read config file
#config = open(sys.args[1], "r")
#slave = []
#for line in config :
# type, ip = line.lstrip().split()
# if type == "master" :
# master = ip
# else :
# slave.append(ip)
#connection
s = XenAPI.Session('http://localhost')
s.login_with_password('root', 'xenroot')
#Getting all the installed and running VMs with dom-id > 0
slaves = []
master = None
vmrecords = s.xenapi.VM.get_all_records()
for k, v in vmrecords.iteritems() :
if v['power_state'] == 'Running' and int(v['domid']) > 0:
ip = commands.getoutput("xenstore-ls /local/domain/" + v['domid'] + " | grep ip")
try:
ip = ip.split()[2]
ip = ip[1:-1]
slaves.append((k, ip))
except:
print "VM in dom" + v['domid'] + " doesn't have an IP address"
#finding out which one is the master
svm = XenAPI.Session("http://" + slaves[0][1])
try :
svm.login_with_password('root', 'xenroot')
masterref = svm.xenapi.pool.get_all_records().values()[0]['master']
masterrecord = svm.xenapi.host.get_record(masterref)
masterip = masterrecord['address']
except XenAPI.Failure, inst:
masterip = inst.details[1]
svm = XenAPI.Session("http://" + masterip)
svm.login_with_password('root', 'xenroot')
masterref = svm.xenapi.pool.get_all_records().values()[0]['master']
for i in slaves :
if masterip == i[1] :
master = i
slaves.remove(i)
break
print "Master ip address is " + master[1]
#getting ip -> hostref references
hosts = {}
hostsrecs = svm.xenapi.host.get_all_records()
for k, v in hostsrecs.iteritems() :
hosts[v['address']] = k
#getting the VM running
vmrunning = {}
vmrecords = svm.xenapi.VM.get_all_records()
for k, v in vmrecords.iteritems() :
if v['power_state'] == 'Running' and int(v['domid']) == 0:
vmrunning[k] = v
bringup = None
vifbringup = None
if sys.argv[-1] == "master_hard_failure" :
print "Shutting down the master"
s.xenapi.VM.hard_shutdown(master[0])
bringup = master[0]
elif sys.argv[-1] == "master_soft_failure" :
print "Shutting down the master"
s.xenapi.VM.clean_shutdown(master[0])
bringup = master[0]
elif sys.argv[-1] == "slave_hard_failure" :
r = random.randint(0, len(slaves) - 1)
print "Shutting down slave " + slaves[r][1]
s.xenapi.VM.hard_shutdown(slaves[r][0])
bringup = slaves[r][0]
elif sys.argv[-1] == "slave_hard_failure" :
r = random.randint(0, len(slaves) - 1)
print "Shutting down slave " + slaves[r][1]
s.xenapi.VM.clean_shutdown(slaves[r][0])
bringup = slaves[r][0]
elif sys.argv[-1] == "master_vif_unplug" :
print "Unplugging the first found attached VIF in the master"
allvifs = s.xenapi.VIF.get_all_records()
for k, v in allvifs.iteritems() :
if v['currently_attached'] and v['VM'] == master[0]:
vifbringup = k
s.xenapi.VIF.unplug(vifbringup)
break
print "Waiting " + str(secs) + " seconds"
count = 0
while count < secs :
time.sleep(1)
sys.stdout.write(".")
sys.stdout.flush()
count = count + 1
sys.stdout.write("\n")
if bringup is not None :
print "Bringing the host up again"
s.xenapi.VM.start(bringup, False, True)
if vifbringup is not None :
print "Plugging the VIF back again"
s.xenapi.VIF.plug(vifbringup)
print "Waiting " + str(secs) + " seconds"
count = 0
while count < secs :
time.sleep(1)
sys.stdout.write(".")
sys.stdout.flush()
count = count + 1
sys.stdout.write("\n")
print "Collecting logs now..."
try :
fileout = open("master-" + master[1] + "-log.tar.bz2", "w")
f = urllib.urlopen("http://root:xenroot@" + master[1] + "/system-status?host_id=" + hosts[master[1]])
buf = f.read(50)
if len(buf) == 0 :
print master[1] + " returned an empty log."
else :
print "Wrote master log to master-" + master[1] + "-log.tar.bz2"
while len(buf) > 0 :
fileout.write(buf)
buf = f.read(50)
except IOError:
print "Unable to connect to %s: network error." % master[1]
try:
fileout.close()
f.close()
except:
pass
for k, ip in slaves :
try :
fileout = open("slave-" + ip + "-log.tar.bz2", "w")
f = urllib.urlopen("http://root:xenroot@" + ip + "/system-status?host_id=" + hosts[ip])
buf = f.read(50)
if len(buf) == 0 :
print ip + " returned an empty log."
else :
print "Wrote slave " + ip + " log to slave-" + ip + "-log.tar.bz2"
while len(buf) > 0 :
fileout.write(buf)
buf = f.read(50)
except IOError:
print "Unable to connect to %s: network error." % ip
try:
fileout.close()
f.close()
except:
pass
#checking if everything is still OK
print "Connecting to " + master[1] + "..."
svm = XenAPI.Session("http://" + master[1])
try :
svm.login_with_password('root', 'xenroot')
check(svm, master[1])
except XenAPI.Failure, inst:
if inst.details[0] == "HOST_IS_SLAVE" :
print master[0] + " is not master any more"
except IOError:
print "Unable to connect to %s: network error." % master[1]
for slave in slaves :
print "Connecting to " + slave[1] + "..."
svm = XenAPI.Session("http://" + slave[1])
try:
svm.login_with_password('root', 'xenroot')
print "Connection succeeded! Is %s still a slave?" % slave[1]
check(svm, slave[1])
except XenAPI.Failure, inst:
if inst.details[0] == "HOST_IS_SLAVE" :
print "Connection failed because %s is still a slave." % slave[1]
else :
print "Unable to connect to %s: XenAPI failure." % slave[1]
except IOError:
print "Unable to connect to %s: network error." % slave[1]