Skip to content

Commit 1eb5bfa

Browse files
grwilsonbehlendorf
authored andcommitted
Illumos #3145, #3212
3145 single-copy arc 3212 ztest: race condition between vdev_online() and spa_vdev_remove() Reviewed by: Matt Ahrens <matthew.ahrens@delphix.com> Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Eric Schrock <eric.schrock@delphix.com> Reviewed by: Justin T. Gibbs <gibbs@scsiguy.com> Approved by: Eric Schrock <eric.schrock@delphix.com> References: illumos-gate/commit/9253d63df408bb48584e0b1abfcc24ef2472382e illumos changeset: 13840:97fd5cdf328a https://www.illumos.org/issues/3145 https://www.illumos.org/issues/3212 Ported-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #989 Closes #1137
1 parent 753c383 commit 1eb5bfa

File tree

4 files changed

+113
-2
lines changed

4 files changed

+113
-2
lines changed

cmd/ztest/ztest.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4965,7 +4965,18 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
49654965
if (islog)
49664966
(void) rw_exit(&ztest_name_lock);
49674967
} else {
4968+
/*
4969+
* Ideally we would like to be able to randomly
4970+
* call vdev_[on|off]line without holding locks
4971+
* to force unpredictable failures but the side
4972+
* effects of vdev_[on|off]line prevent us from
4973+
* doing so. We grab the ztest_vdev_lock here to
4974+
* prevent a race between injection testing and
4975+
* aux_vdev removal.
4976+
*/
4977+
mutex_enter(&ztest_vdev_lock);
49684978
(void) vdev_online(spa, guid0, 0, NULL);
4979+
mutex_exit(&ztest_vdev_lock);
49694980
}
49704981
}
49714982

include/sys/arc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ int arc_released(arc_buf_t *buf);
109109
int arc_has_callback(arc_buf_t *buf);
110110
void arc_buf_freeze(arc_buf_t *buf);
111111
void arc_buf_thaw(arc_buf_t *buf);
112+
boolean_t arc_buf_eviction_needed(arc_buf_t *buf);
112113
#ifdef ZFS_DEBUG
113114
int arc_referenced(arc_buf_t *buf);
114115
#endif

module/zfs/arc.c

Lines changed: 83 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ unsigned long zfs_arc_meta_limit = 0;
189189
int zfs_arc_grow_retry = 0;
190190
int zfs_arc_shrink_shift = 0;
191191
int zfs_arc_p_min_shift = 0;
192+
int zfs_disable_dup_eviction = 0;
192193
int zfs_arc_meta_prune = 0;
193194

194195
/*
@@ -307,6 +308,9 @@ typedef struct arc_stats {
307308
kstat_named_t arcstat_l2_size;
308309
kstat_named_t arcstat_l2_hdr_size;
309310
kstat_named_t arcstat_memory_throttle_count;
311+
kstat_named_t arcstat_duplicate_buffers;
312+
kstat_named_t arcstat_duplicate_buffers_size;
313+
kstat_named_t arcstat_duplicate_reads;
310314
kstat_named_t arcstat_memory_direct_count;
311315
kstat_named_t arcstat_memory_indirect_count;
312316
kstat_named_t arcstat_no_grow;
@@ -387,6 +391,9 @@ static arc_stats_t arc_stats = {
387391
{ "l2_size", KSTAT_DATA_UINT64 },
388392
{ "l2_hdr_size", KSTAT_DATA_UINT64 },
389393
{ "memory_throttle_count", KSTAT_DATA_UINT64 },
394+
{ "duplicate_buffers", KSTAT_DATA_UINT64 },
395+
{ "duplicate_buffers_size", KSTAT_DATA_UINT64 },
396+
{ "duplicate_reads", KSTAT_DATA_UINT64 },
390397
{ "memory_direct_count", KSTAT_DATA_UINT64 },
391398
{ "memory_indirect_count", KSTAT_DATA_UINT64 },
392399
{ "arc_no_grow", KSTAT_DATA_UINT64 },
@@ -1369,6 +1376,17 @@ arc_buf_clone(arc_buf_t *from)
13691376
hdr->b_buf = buf;
13701377
arc_get_data_buf(buf);
13711378
bcopy(from->b_data, buf->b_data, size);
1379+
1380+
/*
1381+
* This buffer already exists in the arc so create a duplicate
1382+
* copy for the caller. If the buffer is associated with user data
1383+
* then track the size and number of duplicates. These stats will be
1384+
* updated as duplicate buffers are created and destroyed.
1385+
*/
1386+
if (hdr->b_type == ARC_BUFC_DATA) {
1387+
ARCSTAT_BUMP(arcstat_duplicate_buffers);
1388+
ARCSTAT_INCR(arcstat_duplicate_buffers_size, size);
1389+
}
13721390
hdr->b_datacnt += 1;
13731391
return (buf);
13741392
}
@@ -1467,6 +1485,16 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
14671485
ASSERT3U(state->arcs_size, >=, size);
14681486
atomic_add_64(&state->arcs_size, -size);
14691487
buf->b_data = NULL;
1488+
1489+
/*
1490+
* If we're destroying a duplicate buffer make sure
1491+
* that the appropriate statistics are updated.
1492+
*/
1493+
if (buf->b_hdr->b_datacnt > 1 &&
1494+
buf->b_hdr->b_type == ARC_BUFC_DATA) {
1495+
ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
1496+
ARCSTAT_INCR(arcstat_duplicate_buffers_size, -size);
1497+
}
14701498
ASSERT(buf->b_hdr->b_datacnt > 0);
14711499
buf->b_hdr->b_datacnt -= 1;
14721500
}
@@ -1651,6 +1679,48 @@ arc_buf_size(arc_buf_t *buf)
16511679
return (buf->b_hdr->b_size);
16521680
}
16531681

1682+
/*
1683+
* Called from the DMU to determine if the current buffer should be
1684+
* evicted. In order to ensure proper locking, the eviction must be initiated
1685+
* from the DMU. Return true if the buffer is associated with user data and
1686+
* duplicate buffers still exist.
1687+
*/
1688+
boolean_t
1689+
arc_buf_eviction_needed(arc_buf_t *buf)
1690+
{
1691+
arc_buf_hdr_t *hdr;
1692+
boolean_t evict_needed = B_FALSE;
1693+
1694+
if (zfs_disable_dup_eviction)
1695+
return (B_FALSE);
1696+
1697+
mutex_enter(&buf->b_evict_lock);
1698+
hdr = buf->b_hdr;
1699+
if (hdr == NULL) {
1700+
/*
1701+
* We are in arc_do_user_evicts(); let that function
1702+
* perform the eviction.
1703+
*/
1704+
ASSERT(buf->b_data == NULL);
1705+
mutex_exit(&buf->b_evict_lock);
1706+
return (B_FALSE);
1707+
} else if (buf->b_data == NULL) {
1708+
/*
1709+
* We have already been added to the arc eviction list;
1710+
* recommend eviction.
1711+
*/
1712+
ASSERT3P(hdr, ==, &arc_eviction_hdr);
1713+
mutex_exit(&buf->b_evict_lock);
1714+
return (B_TRUE);
1715+
}
1716+
1717+
if (hdr->b_datacnt > 1 && hdr->b_type == ARC_BUFC_DATA)
1718+
evict_needed = B_TRUE;
1719+
1720+
mutex_exit(&buf->b_evict_lock);
1721+
return (evict_needed);
1722+
}
1723+
16541724
/*
16551725
* Evict buffers from list until we've removed the specified number of
16561726
* bytes. Move the removed buffers to the appropriate evict state.
@@ -2753,8 +2823,10 @@ arc_read_done(zio_t *zio)
27532823
abuf = buf;
27542824
for (acb = callback_list; acb; acb = acb->acb_next) {
27552825
if (acb->acb_done) {
2756-
if (abuf == NULL)
2826+
if (abuf == NULL) {
2827+
ARCSTAT_BUMP(arcstat_duplicate_reads);
27572828
abuf = arc_buf_clone(buf);
2829+
}
27582830
acb->acb_buf = abuf;
27592831
abuf = NULL;
27602832
}
@@ -3324,6 +3396,16 @@ arc_release(arc_buf_t *buf, void *tag)
33243396
ASSERT3U(*size, >=, hdr->b_size);
33253397
atomic_add_64(size, -hdr->b_size);
33263398
}
3399+
3400+
/*
3401+
* We're releasing a duplicate user data buffer, update
3402+
* our statistics accordingly.
3403+
*/
3404+
if (hdr->b_type == ARC_BUFC_DATA) {
3405+
ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
3406+
ARCSTAT_INCR(arcstat_duplicate_buffers_size,
3407+
-hdr->b_size);
3408+
}
33273409
hdr->b_datacnt -= 1;
33283410
arc_cksum_verify(buf);
33293411

module/zfs/dbuf.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2189,7 +2189,24 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
21892189
dbuf_evict(db);
21902190
} else {
21912191
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0);
2192-
if (!DBUF_IS_CACHEABLE(db))
2192+
2193+
/*
2194+
* A dbuf will be eligible for eviction if either the
2195+
* 'primarycache' property is set or a duplicate
2196+
* copy of this buffer is already cached in the arc.
2197+
*
2198+
* In the case of the 'primarycache' a buffer
2199+
* is considered for eviction if it matches the
2200+
* criteria set in the property.
2201+
*
2202+
* To decide if our buffer is considered a
2203+
* duplicate, we must call into the arc to determine
2204+
* if multiple buffers are referencing the same
2205+
* block on-disk. If so, then we simply evict
2206+
* ourselves.
2207+
*/
2208+
if (!DBUF_IS_CACHEABLE(db) ||
2209+
arc_buf_eviction_needed(db->db_buf))
21932210
dbuf_clear(db);
21942211
else
21952212
mutex_exit(&db->db_mtx);

0 commit comments

Comments
 (0)