Skip to content

Commit

Permalink
Merge pull request #533 from MarkSymsCtx/CA-350871
Browse files Browse the repository at this point in the history
Optimise locking in LVHDSR
  • Loading branch information
MarkSymsCtx committed Jan 25, 2021
2 parents d4c978d + bc31814 commit d1cdd8d
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 119 deletions.
101 changes: 57 additions & 44 deletions drivers/LVHDSR.py
Expand Up @@ -29,6 +29,7 @@
import scsiutil
import os
import sys
import time
import errno
import xs_errors
import cleanup
Expand Down Expand Up @@ -80,6 +81,8 @@
"sr_update", "vdi_create", "vdi_delete", "vdi_resize", "vdi_snapshot",
"vdi_clone"]

# Log if snapshot pauses VM for more than this many seconds
LONG_SNAPTIME = 60

class LVHDSR(SR.SR):
DRIVER_TYPE = 'lvhd'
Expand Down Expand Up @@ -1671,6 +1674,7 @@ def _do_snapshot(self, sr_uuid, vdi_uuid, snapType,
else:
consistency_state = None

pause_time = time.time()
if not blktap2.VDI.tap_pause(self.session, sr_uuid, vdi_uuid):
raise util.SMException("failed to pause VDI %s" % vdi_uuid)

Expand All @@ -1686,6 +1690,10 @@ def _do_snapshot(self, sr_uuid, vdi_uuid, snapType,
'%s (error ignored)' % e2)
raise
blktap2.VDI.tap_unpause(self.session, sr_uuid, vdi_uuid, secondary)
unpause_time = time.time()
if (unpause_time - pause_time) > LONG_SNAPTIME:
util.SMlog('WARNING: snapshot paused VM for %s seconds' %
(unpause_time - pause_time))
return snapResult

def _snapshot(self, snapType, cloneOp=False, cbtlog=None, cbt_consistency=None):
Expand Down Expand Up @@ -1765,54 +1773,57 @@ def _snapshot(self, snapType, cloneOp=False, cbtlog=None, cbt_consistency=None):
if snapType == VDI.SNAPSHOT_DOUBLE:
clonUuid = util.gen_uuid()
jval = "%s_%s" % (baseUuid, clonUuid)
self.sr.journaler.create(self.JRN_CLONE, origUuid, jval)
with lvutil.LvmLockContext():
# This makes multiple LVM calls so take the lock early
self.sr.journaler.create(self.JRN_CLONE, origUuid, jval)
util.fistpoint.activate("LVHDRT_clone_vdi_after_create_journal", self.sr.uuid)

try:
# self becomes the "base vdi"
origOldLV = self.lvname
baseLV = lvhdutil.LV_PREFIX[self.vdi_type] + baseUuid
self.sr.lvmCache.rename(self.lvname, baseLV)
self.sr.lvActivator.replace(self.uuid, baseUuid, baseLV, False)
RefCounter.set(baseUuid, 1, 0, lvhdutil.NS_PREFIX_LVM + self.sr.uuid)
self.uuid = baseUuid
self.lvname = baseLV
self.path = os.path.join(self.sr.path, baseLV)
self.label = "base copy"
self.read_only = True
self.location = self.uuid
self.managed = False

# shrink the base copy to the minimum - we do it before creating
# the snapshot volumes to avoid requiring double the space
if self.vdi_type == vhdutil.VDI_TYPE_VHD:
lvhdutil.deflate(self.sr.lvmCache, self.lvname, lvSizeBase)
self.utilisation = lvSizeBase
util.fistpoint.activate("LVHDRT_clone_vdi_after_shrink_parent", self.sr.uuid)

snapVDI = self._createSnap(origUuid, lvSizeOrig, False)
util.fistpoint.activate("LVHDRT_clone_vdi_after_first_snap", self.sr.uuid)
snapVDI2 = None
if snapType == VDI.SNAPSHOT_DOUBLE:
snapVDI2 = self._createSnap(clonUuid, lvSizeClon, True)
# If we have CBT enabled on the VDI,
# set CBT status for the new snapshot disk
if cbtlog:
snapVDI2.cbt_enabled = True
util.fistpoint.activate("LVHDRT_clone_vdi_after_second_snap", self.sr.uuid)


# note: it is important to mark the parent hidden only AFTER the
# new VHD children have been created, which are referencing it;
# otherwise we would introduce a race with GC that could reclaim
# the parent before we snapshot it
if self.vdi_type == vhdutil.VDI_TYPE_RAW:
self.sr.lvmCache.setHidden(self.lvname)
else:
vhdutil.setHidden(self.path)
util.fistpoint.activate("LVHDRT_clone_vdi_after_parent_hidden", self.sr.uuid)
with lvutil.LvmLockContext():
# self becomes the "base vdi"
origOldLV = self.lvname
baseLV = lvhdutil.LV_PREFIX[self.vdi_type] + baseUuid
self.sr.lvmCache.rename(self.lvname, baseLV)
self.sr.lvActivator.replace(self.uuid, baseUuid, baseLV, False)
RefCounter.set(baseUuid, 1, 0, lvhdutil.NS_PREFIX_LVM + self.sr.uuid)
self.uuid = baseUuid
self.lvname = baseLV
self.path = os.path.join(self.sr.path, baseLV)
self.label = "base copy"
self.read_only = True
self.location = self.uuid
self.managed = False

# shrink the base copy to the minimum - we do it before creating
# the snapshot volumes to avoid requiring double the space
if self.vdi_type == vhdutil.VDI_TYPE_VHD:
lvhdutil.deflate(self.sr.lvmCache, self.lvname, lvSizeBase)
self.utilisation = lvSizeBase
util.fistpoint.activate("LVHDRT_clone_vdi_after_shrink_parent", self.sr.uuid)

snapVDI = self._createSnap(origUuid, lvSizeOrig, False)
util.fistpoint.activate("LVHDRT_clone_vdi_after_first_snap", self.sr.uuid)
snapVDI2 = None
if snapType == VDI.SNAPSHOT_DOUBLE:
snapVDI2 = self._createSnap(clonUuid, lvSizeClon, True)
# If we have CBT enabled on the VDI,
# set CBT status for the new snapshot disk
if cbtlog:
snapVDI2.cbt_enabled = True
util.fistpoint.activate("LVHDRT_clone_vdi_after_second_snap", self.sr.uuid)

# note: it is important to mark the parent hidden only AFTER the
# new VHD children have been created, which are referencing it;
# otherwise we would introduce a race with GC that could reclaim
# the parent before we snapshot it
if self.vdi_type == vhdutil.VDI_TYPE_RAW:
self.sr.lvmCache.setHidden(self.lvname)
else:
vhdutil.setHidden(self.path)
util.fistpoint.activate("LVHDRT_clone_vdi_after_parent_hidden", self.sr.uuid)

# set the base copy to ReadOnly
# Do this outside the LvmLockContext to avoid deadlock
self.sr.lvmCache.setReadonly(self.lvname, True)
util.fistpoint.activate("LVHDRT_clone_vdi_after_parent_ro", self.sr.uuid)

Expand All @@ -1839,7 +1850,9 @@ def _snapshot(self, snapType, cloneOp=False, cbtlog=None, cbt_consistency=None):
self._failClone(origUuid, jval, str(e))
util.fistpoint.activate("LVHDRT_clone_vdi_before_remove_journal", self.sr.uuid)

self.sr.journaler.remove(self.JRN_CLONE, origUuid)
with lvutil.LvmLockContext():
# This makes multiple LVM calls so take the lock early
self.sr.journaler.remove(self.JRN_CLONE, origUuid)

return self._finishSnapshot(snapVDI, snapVDI2, hostRefs, cloneOp, snapType)

Expand Down
1 change: 0 additions & 1 deletion drivers/lvmcache.py
Expand Up @@ -210,7 +210,6 @@ def deactivateNoRefcount(self, lvName):
self.lvs[lvName].active = False
else:
util.SMlog("LVMCache.deactivateNoRefcount: no LV %s" % lvName)
lvutil._lvmBugCleanup(path)

@lazyInit
def setHidden(self, lvName, hidden=True):
Expand Down
97 changes: 25 additions & 72 deletions drivers/lvutil.py
Expand Up @@ -73,6 +73,8 @@

LVM_COMMANDS = VG_COMMANDS.union(PV_COMMANDS, LV_COMMANDS, DM_COMMANDS)

LVM_LOCK = 'lvm'


def extract_vgname(str_in):
"""Search for and return a VG name
Expand Down Expand Up @@ -110,15 +112,30 @@ def extract_vgname(str_in):

return None


def get_lvm_lock():
class LvmLockContext(object):
"""
Open and acquire a system wide lock to wrap LVM calls
:return: the created lock
Context manager around the LVM lock.
To allow for higher level operations, e.g. VDI snapshot to pre-emptively
acquire the lock to encapsulte a set of calls and avoid having to reacquire
the locks for each LVM call.
"""
new_lock = lock.Lock('lvm')
new_lock.acquire()
return new_lock

def __init__(self, cmd=None):
self.lock = lock.Lock(LVM_LOCK)
self.cmd = cmd
self.locked = False

def __enter__(self):
if self.cmd and '--readonly' in self.cmd:
return

self.lock.acquire()
self.locked = True

def __exit__(self, exc_type, value, traceback):
if self.locked:
self.lock.release()


def cmd_lvm(cmd, pread_func=util.pread2, *args):
Expand Down Expand Up @@ -163,14 +180,10 @@ def cmd_lvm(cmd, pread_func=util.pread2, *args):
util.SMlog("CMD_LVM: Not all lvm arguments are of type 'str'")
return None

lvm_lock = get_lvm_lock()

try:
with LvmLockContext(cmd):
start_time = time.time()
stdout = pread_func([os.path.join(LVM_BIN, lvm_cmd)] + lvm_args, * args)
end_time = time.time()
finally:
lvm_lock.release()

if (end_time - start_time > MAX_OPERATION_DURATION):
util.SMlog("***** Long LVM call of '%s' took %s" % (lvm_cmd, (end_time - start_time)))
Expand Down Expand Up @@ -560,7 +573,6 @@ def remove(path, config_param=None):
if i >= LVM_FAIL_RETRIES - 1:
raise
util.SMlog("*** lvremove failed on attempt #%d" % i)
_lvmBugCleanup(path)


def _remove(path, config_param=None):
Expand Down Expand Up @@ -635,7 +647,6 @@ def deactivateNoRefcount(path):
if i >= LVM_FAIL_RETRIES - 1:
raise
util.SMlog("*** lvchange -an failed on attempt #%d" % i)
_lvmBugCleanup(path)


def _deactivate(path):
Expand Down Expand Up @@ -681,64 +692,6 @@ def _checkActive(path):
return False


def _lvmBugCleanup(path):
# the device should not exist at this point. If it does, this was an LVM
# bug, and we manually clean up after LVM here
mapperDevice = path[5:].replace("-", "--").replace("/", "-")
mapperPath = "/dev/mapper/" + mapperDevice

nodeExists = False
cmd_st = [CMD_DMSETUP, "status", mapperDevice]
cmd_rm = [CMD_DMSETUP, "remove", mapperDevice]
cmd_rf = [CMD_DMSETUP, "remove", mapperDevice, "--force"]

try:
util.pread(cmd_st, expect_rc=1)
except util.CommandException as e:
if e.code == 0:
nodeExists = True

if not util.pathexists(mapperPath) and not nodeExists:
return

util.SMlog("_lvmBugCleanup: seeing dm file %s" % mapperPath)

# destroy the dm device
if nodeExists:
util.SMlog("_lvmBugCleanup: removing dm device %s" % mapperDevice)
for i in range(LVM_FAIL_RETRIES):
try:
util.pread2(cmd_rm)
break
except util.CommandException as e:
if i < LVM_FAIL_RETRIES - 1:
util.SMlog("Failed on try %d, retrying" % i)
try:
util.pread(cmd_st, expect_rc=1)
util.SMlog("_lvmBugCleanup: dm device {}"
" removed".format(mapperDevice)
)
break
except:
cmd_rm = cmd_rf
time.sleep(1)
else:
# make sure the symlink is still there for consistency
if not os.path.lexists(path):
os.symlink(mapperPath, path)
util.SMlog("_lvmBugCleanup: restored symlink %s" % path)
raise e

if util.pathexists(mapperPath):
os.unlink(mapperPath)
util.SMlog("_lvmBugCleanup: deleted devmapper file %s" % mapperPath)

# delete the symlink
if os.path.lexists(path):
os.unlink(path)
util.SMlog("_lvmBugCleanup: deleted symlink %s" % path)


# mdpath is of format /dev/VG-SR-UUID/MGT
# or in other words /VG_LOCATION/VG_PREFIXSR-UUID/MDVOLUME_NAME
def ensurePathExists(mdpath):
Expand Down
3 changes: 1 addition & 2 deletions tests/test_lvutil.py
Expand Up @@ -107,9 +107,8 @@ def test_remove_removes_volume(self, lvsystem):

self.assertEquals([], lvsystem.get_logical_volumes_with_name('volume'))

@mock.patch('lvutil._lvmBugCleanup', autospec=True)
@mock.patch('util.pread', autospec=True)
def test_remove_additional_config_param(self, mock_pread, _bugCleanup):
def test_remove_additional_config_param(self, mock_pread):
lvutil.remove('VG_XenStorage-b3b18d06-b2ba-5b67-f098-3cdd5087a2a7/volume', config_param="blah")
mock_pread.assert_called_once_with(
[os.path.join(lvutil.LVM_BIN, lvutil.CMD_LVREMOVE)]
Expand Down

0 comments on commit d1cdd8d

Please sign in to comment.