Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prevent race between GC relink and activate #517

Merged
merged 2 commits into from
Sep 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions drivers/blktap2.py
Original file line number Diff line number Diff line change
Expand Up @@ -1435,17 +1435,25 @@ def _add_tag(self, vdi_uuid, writable):
term_output=False, writable=writable):
raise util.SMException("VDI %s not detached cleanly" % vdi_uuid)
sm_config = self._session.xenapi.VDI.get_sm_config(vdi_ref)
if 'relinking' in sm_config:
util.SMlog("Relinking key found, back-off and retry" % sm_config)
return False
if sm_config.has_key('paused'):
util.SMlog("Paused or host_ref key found [%s]" % sm_config)
return False
self._session.xenapi.VDI.add_to_sm_config(
vdi_ref, 'activating', 'True')
host_key = "host_%s" % host_ref
assert not sm_config.has_key(host_key)
self._session.xenapi.VDI.add_to_sm_config(vdi_ref, host_key,
attach_mode)
sm_config = self._session.xenapi.VDI.get_sm_config(vdi_ref)
if sm_config.has_key('paused'):
util.SMlog("Found paused key, aborting")
if sm_config.has_key('paused') or sm_config.has_key('relinking'):
util.SMlog("Found %s key, aborting" % (
'paused' if sm_config.has_key('paused') else 'relinking'))
self._session.xenapi.VDI.remove_from_sm_config(vdi_ref, host_key)
self._session.xenapi.VDI.remove_from_sm_config(
vdi_ref, 'activating')
return False
util.SMlog("Activate lock succeeded")
return True
Expand Down Expand Up @@ -1649,6 +1657,10 @@ def _activate_locked(self, sr_uuid, vdi_uuid, options):
util.SMlog('failed to remove tag: %s' % e)
break
raise
finally:
vdi_ref = self._session.xenapi.VDI.get_by_uuid(vdi_uuid)
self._session.xenapi.VDI.remove_from_sm_config(
vdi_ref, 'activating')

# Link result to backend/
self.BackendLink.from_uuid(sr_uuid, vdi_uuid).mklink(dev_path)
Expand Down
38 changes: 38 additions & 0 deletions drivers/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,8 @@ class VDI:
DB_VDI_TYPE = "vdi_type"
DB_VHD_BLOCKS = "vhd-blocks"
DB_VDI_PAUSED = "paused"
DB_VDI_RELINKING = "relinking"
DB_VDI_ACTIVATING = "activating"
DB_GC = "gc"
DB_COALESCE = "coalesce"
DB_LEAFCLSC = "leaf-coalesce" # config key
Expand All @@ -470,6 +472,8 @@ class VDI:
DB_VDI_TYPE: XAPI.CONFIG_SM,
DB_VHD_BLOCKS: XAPI.CONFIG_SM,
DB_VDI_PAUSED: XAPI.CONFIG_SM,
DB_VDI_RELINKING: XAPI.CONFIG_SM,
DB_VDI_ACTIVATING: XAPI.CONFIG_SM,
DB_GC: XAPI.CONFIG_OTHER,
DB_COALESCE: XAPI.CONFIG_OTHER,
DB_LEAFCLSC: XAPI.CONFIG_OTHER,
Expand Down Expand Up @@ -894,8 +898,41 @@ def _reload(self):

# only leaves can be attached
if len(self.children) == 0:
try:
self.delConfig(VDI.DB_VDI_RELINKING)
except XenAPI.Failure, e:
if not util.isInvalidVDI(e):
raise
self.refresh()

def _tagChildrenForRelink(self):
if len(self.children) == 0:
retries = 0
try:
while retries < 10:
retries += 1
if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None:
Util.log("VDI %s is activating, wait to relink" %
self.uuid)
else:
self.setConfig(VDI.DB_VDI_RELINKING, "True")

if self.getConfig(VDI.DB_VDI_ACTIVATING):
self.delConfig(VDI.DB_VDI_RELINKING)
Util.log("VDI %s started activating while tagging" %
self.uuid)
else:
return
time.sleep(1)

raise util.SMException("Failed to tag vdi %s for relink" % self)
except XenAPI.Failure, e:
if not util.isInvalidVDI(e):
raise

for child in self.children:
child._tagChildrenForRelink()

def _loadInfoParent(self):
ret = vhdutil.getParent(self.path, lvhdutil.extractUuid)
if ret:
Expand Down Expand Up @@ -1822,6 +1859,7 @@ def _coalesce(self, vdi):

self.lock()
try:
vdi.parent._tagChildrenForRelink()
self.scan()
vdi._relinkSkip()
finally:
Expand Down
88 changes: 86 additions & 2 deletions tests/test_blktap2.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,47 @@ def test_activate(self, mock_tapdisk, mock_nbd_link, mock_backend,
mock_this_host.return_value = str(uuid.uuid4())

self.mock_session.xenapi.VDI.get_sm_config.return_value = {}
self.mock_session.xenapi.host.get_by_uuid.return_value = 'href1'
self.mock_session.xenapi.VDI.get_by_uuid.return_value = 'vref1'

self.vdi.activate(self.sr_uuid, self.vdi_uuid, True, {})

self.mock_session.xenapi.VDI.add_to_sm_config.assert_has_calls(
[mock.call('vref1', 'activating', 'True'),
mock.call('vref1', 'host_href1', "RW")],
any_order=True)
self.mock_session.xenapi.VDI.remove_from_sm_config.assert_has_calls(
[mock.call('vref1', 'activating')],
any_order=True)

@mock.patch('blktap2.time.sleep', autospec=True)
@mock.patch('blktap2.util.get_this_host', autospec=True)
@mock.patch('blktap2.VDI._attach', autospec=True)
@mock.patch('blktap2.VDI.PhyLink', autospec=True)
@mock.patch('blktap2.VDI.BackendLink', autospec=True)
@mock.patch('blktap2.VDI.NBDLink', autospec=True)
@mock.patch('blktap2.Tapdisk')
def test_activate_relink_retry(
self, mock_tapdisk, mock_nbd_link, mock_backend,
mock_phy, mock_attach,
mock_this_host, mock_sleep):
"""
Test blktap2.VDI.activate, relinking, retry 1, success
"""
mock_this_host.return_value = str(uuid.uuid4())

self.mock_session.xenapi.host.get_by_uuid.return_value = 'href1'
self.mock_session.xenapi.VDI.get_by_uuid.return_value = 'vref1'
self.mock_session.xenapi.VDI.get_sm_config.side_effect = [
{'relinking': 'true'}, {}, {}]

self.vdi.activate(self.sr_uuid, self.vdi_uuid, True, {})

self.mock_session.xenapi.VDI.add_to_sm_config.assert_has_calls(
[mock.call('vref1', 'activating', 'True'),
mock.call('vref1', 'host_href1', "RW")],
any_order=True)

@mock.patch('blktap2.time.sleep', autospec=True)
@mock.patch('blktap2.util.get_this_host', autospec=True)
@mock.patch('blktap2.VDI._attach', autospec=True)
Expand All @@ -177,10 +215,16 @@ def test_activate_pause_retry(
"""
mock_this_host.return_value = str(uuid.uuid4())

self.mock_session.xenapi.host.get_by_uuid.return_value = 'href1'
self.mock_session.xenapi.VDI.get_by_uuid.return_value = 'vref1'
self.mock_session.xenapi.VDI.get_sm_config.side_effect = [
{'paused': 'true'}, {}, {}]

self.vdi.activate(self.sr_uuid, self.vdi_uuid, True, {})
self.mock_session.xenapi.VDI.add_to_sm_config.assert_has_calls(
[mock.call('vref1', 'activating', 'True'),
mock.call('vref1', 'host_href1', "RW")],
any_order=True)

@mock.patch('blktap2.time.sleep', autospec=True)
@mock.patch('blktap2.util.get_this_host', autospec=True)
Expand All @@ -206,8 +250,48 @@ def test_activate_paused_while_tagging(

self.vdi.activate(self.sr_uuid, self.vdi_uuid, True, {})

self.mock_session.xenapi.VDI.remove_from_sm_config.assert_called_with(
'vref1', 'host_href1')
self.mock_session.xenapi.VDI.add_to_sm_config.assert_has_calls(
[mock.call('vref1', 'activating', 'True'),
mock.call('vref1', 'host_href1', "RW")],
any_order=True)
self.mock_session.xenapi.VDI.remove_from_sm_config.assert_has_calls(
[mock.call('vref1', 'host_href1'),
mock.call('vref1', 'activating')],
any_order=True)

@mock.patch('blktap2.time.sleep', autospec=True)
@mock.patch('blktap2.util.get_this_host', autospec=True)
@mock.patch('blktap2.VDI._attach', autospec=True)
@mock.patch('blktap2.VDI.PhyLink', autospec=True)
@mock.patch('blktap2.VDI.BackendLink', autospec=True)
@mock.patch('blktap2.VDI.NBDLink', autospec=True)
@mock.patch('blktap2.Tapdisk')
def test_activate_relink_while_tagging(
self, mock_tapdisk, mock_nbd_link, mock_backend,
mock_phy, mock_attach,
mock_this_host, mock_sleep):
"""
Test blktap2.VDI.activate, relinking, while tagging, retry 1, success
"""
host_uuid = str(uuid.uuid4())
mock_this_host.return_value = host_uuid

self.mock_session.xenapi.host.get_by_uuid.return_value = 'href1'
self.mock_session.xenapi.VDI.get_by_uuid.return_value = 'vref1'
self.mock_session.xenapi.VDI.get_sm_config.side_effect = [
{}, {'relinking': 'true'}, {}, {}]

self.vdi.activate(self.sr_uuid, self.vdi_uuid, True, {})

self.mock_session.xenapi.VDI.add_to_sm_config.assert_has_calls(
[mock.call('vref1', 'activating', 'True'),
mock.call('vref1', 'host_href1', "RW")],
any_order=True)
self.mock_session.xenapi.VDI.remove_from_sm_config.assert_has_calls(
[mock.call('vref1', 'host_href1'),
mock.call('vref1', 'activating')],
any_order=True)


class TestTapCtl(unittest.TestCase):

Expand Down
139 changes: 119 additions & 20 deletions tests/test_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -1357,30 +1357,14 @@ def test_leafCoalesceTracker(self):
def runAbortable(self, func, ret, ns, abortTest, pollInterval, timeOut):
return func()

@mock.patch('cleanup.util', autospec=True)
@mock.patch('cleanup.vhdutil', autospec=True)
@mock.patch('cleanup.journaler.Journaler', autospec=True)
@mock.patch('cleanup.Util.runAbortable')
def test_coalesce_success(
self, mock_abortable, mock_journaler, mock_vhdutil, mock_util):
"""
Non-leaf coalesce
"""
mock_abortable.side_effect = self.runAbortable

sr_uuid = uuid4()
sr = create_cleanup_sr(self.xapi_mock, uuid=str(sr_uuid))
sr.journaler = mock_journaler

mock_ipc_flag = mock.MagicMock(spec=ipc.IPCFlag)
print('IPC = %s' % (mock_ipc_flag))
self.mock_IPCFlag.return_value = mock_ipc_flag
mock_ipc_flag.test.return_value = None
def add_vdis_for_coalesce(self, sr):
vdis = {}

parent_uuid = str(uuid4())
parent = cleanup.VDI(sr, parent_uuid, False)
parent.path = 'dummy-path'
sr.vdis[parent_uuid] = parent
vdis['parent'] = parent

vdi_uuid = str(uuid4())
vdi = cleanup.VDI(sr, vdi_uuid, False)
Expand All @@ -1389,20 +1373,135 @@ def test_coalesce_success(
parent.children.append(vdi)

sr.vdis[vdi_uuid] = vdi
vdis['vdi'] = vdi

child_vdi_uuid = str(uuid4())
child_vdi = cleanup.VDI(sr, child_vdi_uuid, False)
child_vdi.path = 'dummy-child'
vdi.children.append(child_vdi)
sr.vdis[child_vdi_uuid] = child_vdi
vdis['child'] = child_vdi

return vdis

@mock.patch('cleanup.util', autospec=True)
@mock.patch('cleanup.vhdutil', autospec=True)
@mock.patch('cleanup.journaler.Journaler', autospec=True)
@mock.patch('cleanup.Util.runAbortable')
def test_coalesce_success(
self, mock_abortable, mock_journaler, mock_vhdutil, mock_util):
"""
Non-leaf coalesce
"""
self.xapi_mock.getConfigVDI.return_value = {}

mock_abortable.side_effect = self.runAbortable

sr_uuid = uuid4()
sr = create_cleanup_sr(self.xapi_mock, uuid=str(sr_uuid))
sr.journaler = mock_journaler

mock_ipc_flag = mock.MagicMock(spec=ipc.IPCFlag)
print('IPC = %s' % (mock_ipc_flag))
self.mock_IPCFlag.return_value = mock_ipc_flag
mock_ipc_flag.test.return_value = None

vdis = self.add_vdis_for_coalesce(sr)
mock_journaler.get.return_value = None

res = sr.coalesce(vdi, False)
vdi_uuid = vdis['vdi'].uuid

res = sr.coalesce(vdis['vdi'], False)

mock_journaler.create.assert_has_calls(
[mock.call('coalesce', vdi_uuid, '1'),
mock.call('relink', vdi_uuid, '1')])
mock_journaler.remove.assert_has_calls(
[mock.call('coalesce', vdi_uuid),
mock.call('relink', vdi_uuid)])

self.xapi_mock.getConfigVDI.assert_has_calls(
[mock.call(vdis['child'], 'activating')])

self.xapi_mock.addToConfigVDI.assert_has_calls(
[mock.call(vdis['child'], 'relinking', 'True')])

# Remove twice as set does a remove first and then for completion
self.xapi_mock.removeFromConfigVDI.assert_has_calls(
[mock.call(vdis['child'], 'relinking'),
mock.call(vdis['child'], 'vhd-parent'),
mock.call(vdis['child'], 'relinking')])

def test_tag_children_for_relink_activation(self):
"""
Cleanup: tag for relink, activation races
"""
self.xapi_mock.getConfigVDI.side_effect = [
{'activating': 'True'},
{},
{}
]

sr_uuid = uuid4()
sr = create_cleanup_sr(self.xapi_mock, uuid=str(sr_uuid))

vdis = self.add_vdis_for_coalesce(sr)

vdis['parent']._tagChildrenForRelink()

self.xapi_mock.getConfigVDI.assert_has_calls(
[mock.call(vdis['child'], 'activating'),
mock.call(vdis['child'], 'activating'),
mock.call(vdis['child'], 'activating')])
self.xapi_mock.addToConfigVDI.assert_has_calls(
[mock.call(vdis['child'], 'relinking', 'True')])
self.assertEqual(1, self.xapi_mock.removeFromConfigVDI.call_count)

def test_tag_children_for_relink_activation_second_phase(self):
"""
Cleanup: tag for relink, set and then activation
"""
self.xapi_mock.getConfigVDI.side_effect = [
{},
{'activating': 'True'},
{},
{}
]

sr_uuid = uuid4()
sr = create_cleanup_sr(self.xapi_mock, uuid=str(sr_uuid))

vdis = self.add_vdis_for_coalesce(sr)

vdis['parent']._tagChildrenForRelink()

self.xapi_mock.getConfigVDI.assert_has_calls(
[mock.call(vdis['child'], 'activating'),
mock.call(vdis['child'], 'activating'),
mock.call(vdis['child'], 'activating'),
mock.call(vdis['child'], 'activating')])
self.xapi_mock.addToConfigVDI.assert_has_calls(
[mock.call(vdis['child'], 'relinking', 'True'),
mock.call(vdis['child'], 'relinking', 'True')])
# Remove called 3 times, twice from set, once on failure
self.assertEqual(3, self.xapi_mock.removeFromConfigVDI.call_count)

@mock.patch('cleanup.time.sleep', autospec=True)
def test_tag_children_for_relink_blocked(self, mock_sleep):
"""
Cleanup: tag for relink, blocked - exception
"""

self.xapi_mock.getConfigVDI.return_value = {'activating': 'True'}

sr_uuid = uuid4()
sr = create_cleanup_sr(self.xapi_mock, uuid=str(sr_uuid))

vdis = self.add_vdis_for_coalesce(sr)

with self.assertRaises(util.SMException) as sme:
vdis['parent']._tagChildrenForRelink()

self.assertIn('Failed to tag vdi', sme.exception.message)

self.assertGreater(mock_sleep.call_count, 5)