From 9c57285ab42204509dcdfe4d600eca8c26fae292 Mon Sep 17 00:00:00 2001 From: Mark Syms Date: Tue, 22 Sep 2020 09:38:51 +0100 Subject: [PATCH 1/2] CA-344254: write tags to leaf VDIs when relinking the chain Signed-off-by: Mark Syms --- drivers/cleanup.py | 38 ++++++++++++ tests/test_cleanup.py | 139 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 157 insertions(+), 20 deletions(-) diff --git a/drivers/cleanup.py b/drivers/cleanup.py index 9d863f28a..cf93568d1 100755 --- a/drivers/cleanup.py +++ b/drivers/cleanup.py @@ -452,6 +452,8 @@ class VDI: DB_VDI_TYPE = "vdi_type" DB_VHD_BLOCKS = "vhd-blocks" DB_VDI_PAUSED = "paused" + DB_VDI_RELINKING = "relinking" + DB_VDI_ACTIVATING = "activating" DB_GC = "gc" DB_COALESCE = "coalesce" DB_LEAFCLSC = "leaf-coalesce" # config key @@ -470,6 +472,8 @@ class VDI: DB_VDI_TYPE: XAPI.CONFIG_SM, DB_VHD_BLOCKS: XAPI.CONFIG_SM, DB_VDI_PAUSED: XAPI.CONFIG_SM, + DB_VDI_RELINKING: XAPI.CONFIG_SM, + DB_VDI_ACTIVATING: XAPI.CONFIG_SM, DB_GC: XAPI.CONFIG_OTHER, DB_COALESCE: XAPI.CONFIG_OTHER, DB_LEAFCLSC: XAPI.CONFIG_OTHER, @@ -894,8 +898,41 @@ def _reload(self): # only leaves can be attached if len(self.children) == 0: + try: + self.delConfig(VDI.DB_VDI_RELINKING) + except XenAPI.Failure, e: + if not util.isInvalidVDI(e): + raise self.refresh() + def _tagChildrenForRelink(self): + if len(self.children) == 0: + retries = 0 + try: + while retries < 10: + retries += 1 + if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: + Util.log("VDI %s is activating, wait to relink" % + self.uuid) + else: + self.setConfig(VDI.DB_VDI_RELINKING, "True") + + if self.getConfig(VDI.DB_VDI_ACTIVATING): + self.delConfig(VDI.DB_VDI_RELINKING) + Util.log("VDI %s started activating while tagging" % + self.uuid) + else: + return + time.sleep(1) + + raise util.SMException("Failed to tag vdi %s for relink" % self) + except XenAPI.Failure, e: + if not util.isInvalidVDI(e): + raise + + for child in self.children: + child._tagChildrenForRelink() + def _loadInfoParent(self): ret = vhdutil.getParent(self.path, lvhdutil.extractUuid) if ret: @@ -1822,6 +1859,7 @@ def _coalesce(self, vdi): self.lock() try: + vdi.parent._tagChildrenForRelink() self.scan() vdi._relinkSkip() finally: diff --git a/tests/test_cleanup.py b/tests/test_cleanup.py index 7fdaa5767..7bb19940a 100644 --- a/tests/test_cleanup.py +++ b/tests/test_cleanup.py @@ -1357,30 +1357,14 @@ def test_leafCoalesceTracker(self): def runAbortable(self, func, ret, ns, abortTest, pollInterval, timeOut): return func() - @mock.patch('cleanup.util', autospec=True) - @mock.patch('cleanup.vhdutil', autospec=True) - @mock.patch('cleanup.journaler.Journaler', autospec=True) - @mock.patch('cleanup.Util.runAbortable') - def test_coalesce_success( - self, mock_abortable, mock_journaler, mock_vhdutil, mock_util): - """ - Non-leaf coalesce - """ - mock_abortable.side_effect = self.runAbortable - - sr_uuid = uuid4() - sr = create_cleanup_sr(self.xapi_mock, uuid=str(sr_uuid)) - sr.journaler = mock_journaler - - mock_ipc_flag = mock.MagicMock(spec=ipc.IPCFlag) - print('IPC = %s' % (mock_ipc_flag)) - self.mock_IPCFlag.return_value = mock_ipc_flag - mock_ipc_flag.test.return_value = None + def add_vdis_for_coalesce(self, sr): + vdis = {} parent_uuid = str(uuid4()) parent = cleanup.VDI(sr, parent_uuid, False) parent.path = 'dummy-path' sr.vdis[parent_uuid] = parent + vdis['parent'] = parent vdi_uuid = str(uuid4()) vdi = cleanup.VDI(sr, vdi_uuid, False) @@ -1389,16 +1373,45 @@ def test_coalesce_success( parent.children.append(vdi) sr.vdis[vdi_uuid] = vdi + vdis['vdi'] = vdi child_vdi_uuid = str(uuid4()) child_vdi = cleanup.VDI(sr, child_vdi_uuid, False) child_vdi.path = 'dummy-child' vdi.children.append(child_vdi) sr.vdis[child_vdi_uuid] = child_vdi + vdis['child'] = child_vdi + + return vdis + + @mock.patch('cleanup.util', autospec=True) + @mock.patch('cleanup.vhdutil', autospec=True) + @mock.patch('cleanup.journaler.Journaler', autospec=True) + @mock.patch('cleanup.Util.runAbortable') + def test_coalesce_success( + self, mock_abortable, mock_journaler, mock_vhdutil, mock_util): + """ + Non-leaf coalesce + """ + self.xapi_mock.getConfigVDI.return_value = {} + + mock_abortable.side_effect = self.runAbortable + + sr_uuid = uuid4() + sr = create_cleanup_sr(self.xapi_mock, uuid=str(sr_uuid)) + sr.journaler = mock_journaler + + mock_ipc_flag = mock.MagicMock(spec=ipc.IPCFlag) + print('IPC = %s' % (mock_ipc_flag)) + self.mock_IPCFlag.return_value = mock_ipc_flag + mock_ipc_flag.test.return_value = None + vdis = self.add_vdis_for_coalesce(sr) mock_journaler.get.return_value = None - res = sr.coalesce(vdi, False) + vdi_uuid = vdis['vdi'].uuid + + res = sr.coalesce(vdis['vdi'], False) mock_journaler.create.assert_has_calls( [mock.call('coalesce', vdi_uuid, '1'), @@ -1406,3 +1419,89 @@ def test_coalesce_success( mock_journaler.remove.assert_has_calls( [mock.call('coalesce', vdi_uuid), mock.call('relink', vdi_uuid)]) + + self.xapi_mock.getConfigVDI.assert_has_calls( + [mock.call(vdis['child'], 'activating')]) + + self.xapi_mock.addToConfigVDI.assert_has_calls( + [mock.call(vdis['child'], 'relinking', 'True')]) + + # Remove twice as set does a remove first and then for completion + self.xapi_mock.removeFromConfigVDI.assert_has_calls( + [mock.call(vdis['child'], 'relinking'), + mock.call(vdis['child'], 'vhd-parent'), + mock.call(vdis['child'], 'relinking')]) + + def test_tag_children_for_relink_activation(self): + """ + Cleanup: tag for relink, activation races + """ + self.xapi_mock.getConfigVDI.side_effect = [ + {'activating': 'True'}, + {}, + {} + ] + + sr_uuid = uuid4() + sr = create_cleanup_sr(self.xapi_mock, uuid=str(sr_uuid)) + + vdis = self.add_vdis_for_coalesce(sr) + + vdis['parent']._tagChildrenForRelink() + + self.xapi_mock.getConfigVDI.assert_has_calls( + [mock.call(vdis['child'], 'activating'), + mock.call(vdis['child'], 'activating'), + mock.call(vdis['child'], 'activating')]) + self.xapi_mock.addToConfigVDI.assert_has_calls( + [mock.call(vdis['child'], 'relinking', 'True')]) + self.assertEqual(1, self.xapi_mock.removeFromConfigVDI.call_count) + + def test_tag_children_for_relink_activation_second_phase(self): + """ + Cleanup: tag for relink, set and then activation + """ + self.xapi_mock.getConfigVDI.side_effect = [ + {}, + {'activating': 'True'}, + {}, + {} + ] + + sr_uuid = uuid4() + sr = create_cleanup_sr(self.xapi_mock, uuid=str(sr_uuid)) + + vdis = self.add_vdis_for_coalesce(sr) + + vdis['parent']._tagChildrenForRelink() + + self.xapi_mock.getConfigVDI.assert_has_calls( + [mock.call(vdis['child'], 'activating'), + mock.call(vdis['child'], 'activating'), + mock.call(vdis['child'], 'activating'), + mock.call(vdis['child'], 'activating')]) + self.xapi_mock.addToConfigVDI.assert_has_calls( + [mock.call(vdis['child'], 'relinking', 'True'), + mock.call(vdis['child'], 'relinking', 'True')]) + # Remove called 3 times, twice from set, once on failure + self.assertEqual(3, self.xapi_mock.removeFromConfigVDI.call_count) + + @mock.patch('cleanup.time.sleep', autospec=True) + def test_tag_children_for_relink_blocked(self, mock_sleep): + """ + Cleanup: tag for relink, blocked - exception + """ + + self.xapi_mock.getConfigVDI.return_value = {'activating': 'True'} + + sr_uuid = uuid4() + sr = create_cleanup_sr(self.xapi_mock, uuid=str(sr_uuid)) + + vdis = self.add_vdis_for_coalesce(sr) + + with self.assertRaises(util.SMException) as sme: + vdis['parent']._tagChildrenForRelink() + + self.assertIn('Failed to tag vdi', sme.exception.message) + + self.assertGreater(mock_sleep.call_count, 5) From 7afd89d8a86b5d3ecce91d8005ff8dadfcefedd4 Mon Sep 17 00:00:00 2001 From: Mark Syms Date: Tue, 22 Sep 2020 15:56:39 +0100 Subject: [PATCH 2/2] CA-344254: check and write tags during blktap2 activation Signed-off-by: Mark Syms --- drivers/blktap2.py | 16 +++++++- tests/test_blktap2.py | 88 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 100 insertions(+), 4 deletions(-) diff --git a/drivers/blktap2.py b/drivers/blktap2.py index 9fdcd9fc5..ce1bd3f3b 100755 --- a/drivers/blktap2.py +++ b/drivers/blktap2.py @@ -1435,17 +1435,25 @@ def _add_tag(self, vdi_uuid, writable): term_output=False, writable=writable): raise util.SMException("VDI %s not detached cleanly" % vdi_uuid) sm_config = self._session.xenapi.VDI.get_sm_config(vdi_ref) + if 'relinking' in sm_config: + util.SMlog("Relinking key found, back-off and retry" % sm_config) + return False if sm_config.has_key('paused'): util.SMlog("Paused or host_ref key found [%s]" % sm_config) return False + self._session.xenapi.VDI.add_to_sm_config( + vdi_ref, 'activating', 'True') host_key = "host_%s" % host_ref assert not sm_config.has_key(host_key) self._session.xenapi.VDI.add_to_sm_config(vdi_ref, host_key, attach_mode) sm_config = self._session.xenapi.VDI.get_sm_config(vdi_ref) - if sm_config.has_key('paused'): - util.SMlog("Found paused key, aborting") + if sm_config.has_key('paused') or sm_config.has_key('relinking'): + util.SMlog("Found %s key, aborting" % ( + 'paused' if sm_config.has_key('paused') else 'relinking')) self._session.xenapi.VDI.remove_from_sm_config(vdi_ref, host_key) + self._session.xenapi.VDI.remove_from_sm_config( + vdi_ref, 'activating') return False util.SMlog("Activate lock succeeded") return True @@ -1649,6 +1657,10 @@ def _activate_locked(self, sr_uuid, vdi_uuid, options): util.SMlog('failed to remove tag: %s' % e) break raise + finally: + vdi_ref = self._session.xenapi.VDI.get_by_uuid(vdi_uuid) + self._session.xenapi.VDI.remove_from_sm_config( + vdi_ref, 'activating') # Link result to backend/ self.BackendLink.from_uuid(sr_uuid, vdi_uuid).mklink(dev_path) diff --git a/tests/test_blktap2.py b/tests/test_blktap2.py index 84ed05b0c..847585784 100644 --- a/tests/test_blktap2.py +++ b/tests/test_blktap2.py @@ -158,9 +158,47 @@ def test_activate(self, mock_tapdisk, mock_nbd_link, mock_backend, mock_this_host.return_value = str(uuid.uuid4()) self.mock_session.xenapi.VDI.get_sm_config.return_value = {} + self.mock_session.xenapi.host.get_by_uuid.return_value = 'href1' + self.mock_session.xenapi.VDI.get_by_uuid.return_value = 'vref1' self.vdi.activate(self.sr_uuid, self.vdi_uuid, True, {}) + self.mock_session.xenapi.VDI.add_to_sm_config.assert_has_calls( + [mock.call('vref1', 'activating', 'True'), + mock.call('vref1', 'host_href1', "RW")], + any_order=True) + self.mock_session.xenapi.VDI.remove_from_sm_config.assert_has_calls( + [mock.call('vref1', 'activating')], + any_order=True) + + @mock.patch('blktap2.time.sleep', autospec=True) + @mock.patch('blktap2.util.get_this_host', autospec=True) + @mock.patch('blktap2.VDI._attach', autospec=True) + @mock.patch('blktap2.VDI.PhyLink', autospec=True) + @mock.patch('blktap2.VDI.BackendLink', autospec=True) + @mock.patch('blktap2.VDI.NBDLink', autospec=True) + @mock.patch('blktap2.Tapdisk') + def test_activate_relink_retry( + self, mock_tapdisk, mock_nbd_link, mock_backend, + mock_phy, mock_attach, + mock_this_host, mock_sleep): + """ + Test blktap2.VDI.activate, relinking, retry 1, success + """ + mock_this_host.return_value = str(uuid.uuid4()) + + self.mock_session.xenapi.host.get_by_uuid.return_value = 'href1' + self.mock_session.xenapi.VDI.get_by_uuid.return_value = 'vref1' + self.mock_session.xenapi.VDI.get_sm_config.side_effect = [ + {'relinking': 'true'}, {}, {}] + + self.vdi.activate(self.sr_uuid, self.vdi_uuid, True, {}) + + self.mock_session.xenapi.VDI.add_to_sm_config.assert_has_calls( + [mock.call('vref1', 'activating', 'True'), + mock.call('vref1', 'host_href1', "RW")], + any_order=True) + @mock.patch('blktap2.time.sleep', autospec=True) @mock.patch('blktap2.util.get_this_host', autospec=True) @mock.patch('blktap2.VDI._attach', autospec=True) @@ -177,10 +215,16 @@ def test_activate_pause_retry( """ mock_this_host.return_value = str(uuid.uuid4()) + self.mock_session.xenapi.host.get_by_uuid.return_value = 'href1' + self.mock_session.xenapi.VDI.get_by_uuid.return_value = 'vref1' self.mock_session.xenapi.VDI.get_sm_config.side_effect = [ {'paused': 'true'}, {}, {}] self.vdi.activate(self.sr_uuid, self.vdi_uuid, True, {}) + self.mock_session.xenapi.VDI.add_to_sm_config.assert_has_calls( + [mock.call('vref1', 'activating', 'True'), + mock.call('vref1', 'host_href1', "RW")], + any_order=True) @mock.patch('blktap2.time.sleep', autospec=True) @mock.patch('blktap2.util.get_this_host', autospec=True) @@ -206,8 +250,48 @@ def test_activate_paused_while_tagging( self.vdi.activate(self.sr_uuid, self.vdi_uuid, True, {}) - self.mock_session.xenapi.VDI.remove_from_sm_config.assert_called_with( - 'vref1', 'host_href1') + self.mock_session.xenapi.VDI.add_to_sm_config.assert_has_calls( + [mock.call('vref1', 'activating', 'True'), + mock.call('vref1', 'host_href1', "RW")], + any_order=True) + self.mock_session.xenapi.VDI.remove_from_sm_config.assert_has_calls( + [mock.call('vref1', 'host_href1'), + mock.call('vref1', 'activating')], + any_order=True) + + @mock.patch('blktap2.time.sleep', autospec=True) + @mock.patch('blktap2.util.get_this_host', autospec=True) + @mock.patch('blktap2.VDI._attach', autospec=True) + @mock.patch('blktap2.VDI.PhyLink', autospec=True) + @mock.patch('blktap2.VDI.BackendLink', autospec=True) + @mock.patch('blktap2.VDI.NBDLink', autospec=True) + @mock.patch('blktap2.Tapdisk') + def test_activate_relink_while_tagging( + self, mock_tapdisk, mock_nbd_link, mock_backend, + mock_phy, mock_attach, + mock_this_host, mock_sleep): + """ + Test blktap2.VDI.activate, relinking, while tagging, retry 1, success + """ + host_uuid = str(uuid.uuid4()) + mock_this_host.return_value = host_uuid + + self.mock_session.xenapi.host.get_by_uuid.return_value = 'href1' + self.mock_session.xenapi.VDI.get_by_uuid.return_value = 'vref1' + self.mock_session.xenapi.VDI.get_sm_config.side_effect = [ + {}, {'relinking': 'true'}, {}, {}] + + self.vdi.activate(self.sr_uuid, self.vdi_uuid, True, {}) + + self.mock_session.xenapi.VDI.add_to_sm_config.assert_has_calls( + [mock.call('vref1', 'activating', 'True'), + mock.call('vref1', 'host_href1', "RW")], + any_order=True) + self.mock_session.xenapi.VDI.remove_from_sm_config.assert_has_calls( + [mock.call('vref1', 'host_href1'), + mock.call('vref1', 'activating')], + any_order=True) + class TestTapCtl(unittest.TestCase):