Skip to content

Commit 080b310

Browse files
grwilsonbehlendorf
authored andcommitted
Illumos #4756 Fix metaslab_group_preload deadlock
4756 metaslab_group_preload() could deadlock Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Dan McDonald <danmcd@omniti.com> Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com> Approved by: Garrett D'Amore <garrett@damore.org> The metaslab_group_preload() function grabs the mg_lock and then later tries to grab the metaslab lock. This lock ordering may lead to a deadlock since other consumers of the mg_lock will grab the metaslab lock first. References: https://www.illumos.org/issues/4756 illumos/illumos-gate@30beaff Ported-by: Prakash Surya <surya1@llnl.gov> Signed-off-by: Prakash Surya <surya1@llnl.gov> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #2488
1 parent 3c51c5c commit 080b310

File tree

2 files changed

+25
-4
lines changed

2 files changed

+25
-4
lines changed

cmd/ztest/ztest.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
*/
2121
/*
2222
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23-
* Copyright (c) 2013 by Delphix. All rights reserved.
23+
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
2424
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
2525
* Copyright (c) 2013 Steven Hartland. All rights reserved.
2626
*/
@@ -187,6 +187,7 @@ static const ztest_shared_opts_t ztest_opts_defaults = {
187187

188188
extern uint64_t metaslab_gang_bang;
189189
extern uint64_t metaslab_df_alloc_threshold;
190+
extern int metaslab_preload_limit;
190191

191192
static ztest_shared_opts_t *ztest_shared_opts;
192193
static ztest_shared_opts_t ztest_opts;
@@ -5734,6 +5735,7 @@ ztest_run(ztest_shared_t *zs)
57345735
kernel_init(FREAD | FWRITE);
57355736
VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
57365737
spa->spa_debug = B_TRUE;
5738+
metaslab_preload_limit = ztest_random(20) + 1;
57375739
ztest_spa = spa;
57385740

57395741
VERIFY0(dmu_objset_own(ztest_opts.zo_pool,

module/zfs/metaslab.c

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1242,6 +1242,8 @@ metaslab_preload(void *arg)
12421242
metaslab_t *msp = arg;
12431243
spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
12441244

1245+
ASSERT(!MUTEX_HELD(&msp->ms_group->mg_lock));
1246+
12451247
mutex_enter(&msp->ms_lock);
12461248
metaslab_load_wait(msp);
12471249
if (!msp->ms_loaded)
@@ -1266,19 +1268,36 @@ metaslab_group_preload(metaslab_group_t *mg)
12661268
taskq_wait(mg->mg_taskq);
12671269
return;
12681270
}
1269-
mutex_enter(&mg->mg_lock);
12701271

1272+
mutex_enter(&mg->mg_lock);
12711273
/*
1272-
* Prefetch the next potential metaslabs
1274+
* Load the next potential metaslabs
12731275
*/
1274-
for (msp = avl_first(t); msp != NULL; msp = AVL_NEXT(t, msp)) {
1276+
msp = avl_first(t);
1277+
while (msp != NULL) {
1278+
metaslab_t *msp_next = AVL_NEXT(t, msp);
12751279

12761280
/* If we have reached our preload limit then we're done */
12771281
if (++m > metaslab_preload_limit)
12781282
break;
12791283

1284+
/*
1285+
* We must drop the metaslab group lock here to preserve
1286+
* lock ordering with the ms_lock (when grabbing both
1287+
* the mg_lock and the ms_lock, the ms_lock must be taken
1288+
* first). As a result, it is possible that the ordering
1289+
* of the metaslabs within the avl tree may change before
1290+
* we reacquire the lock. The metaslab cannot be removed from
1291+
* the tree while we're in syncing context so it is safe to
1292+
* drop the mg_lock here. If the metaslabs are reordered
1293+
* nothing will break -- we just may end up loading a
1294+
* less than optimal one.
1295+
*/
1296+
mutex_exit(&mg->mg_lock);
12801297
VERIFY(taskq_dispatch(mg->mg_taskq, metaslab_preload,
12811298
msp, TQ_PUSHPAGE) != 0);
1299+
mutex_enter(&mg->mg_lock);
1300+
msp = msp_next;
12821301
}
12831302
mutex_exit(&mg->mg_lock);
12841303
}

0 commit comments

Comments
 (0)