Skip to content

Commit bd089c5

Browse files
ahrensbehlendorf
authored andcommitted
Illumos 4631 - zvol_get_stats triggering too many reads
4631 zvol_get_stats triggering too many reads Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Sebastien Roy <sebastien.roy@delphix.com> Reviewed by: Matt Ahrens <mahrens@delphix.com> Approved by: Dan McDonald <danmcd@omniti.com> References: https://www.illumos.org/issues/4631 illumos/illumos-gate@bbfa8ea Ported-by: Boris Protopopov <bprotopopov@hotmail.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #2612 Closes #2480
1 parent 2fe5011 commit bd089c5

File tree

3 files changed

+64
-79
lines changed

3 files changed

+64
-79
lines changed

include/sys/arc.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,6 @@ void arc_buf_info(arc_buf_t *buf, arc_buf_info_t *abi, int state_index);
136136
int arc_buf_size(arc_buf_t *buf);
137137
void arc_release(arc_buf_t *buf, void *tag);
138138
int arc_released(arc_buf_t *buf);
139-
int arc_has_callback(arc_buf_t *buf);
140139
void arc_buf_sigsegv(int sig, siginfo_t *si, void *unused);
141140
void arc_buf_freeze(arc_buf_t *buf);
142141
void arc_buf_thaw(arc_buf_t *buf);
@@ -159,7 +158,7 @@ void arc_remove_prune_callback(arc_prune_t *p);
159158
void arc_freed(spa_t *spa, const blkptr_t *bp);
160159

161160
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
162-
int arc_buf_evict(arc_buf_t *buf);
161+
boolean_t arc_clear_callback(arc_buf_t *buf);
163162

164163
void arc_flush(spa_t *spa);
165164
void arc_tempreserve_clear(uint64_t reserve);

module/zfs/arc.c

Lines changed: 38 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@
104104
* with the buffer may be evicted prior to the callback. The callback
105105
* must be made with *no locks held* (to prevent deadlock). Additionally,
106106
* the users of callbacks must ensure that their private data is
107-
* protected from simultaneous callbacks from arc_buf_evict()
107+
* protected from simultaneous callbacks from arc_clear_callback()
108108
* and arc_do_user_evicts().
109109
*
110110
* It as also possible to register a callback which is run when the
@@ -1604,8 +1604,12 @@ arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
16041604
}
16051605
}
16061606

1607+
/*
1608+
* Free up buf->b_data and if 'remove' is set, then pull the
1609+
* arc_buf_t off of the the arc_buf_hdr_t's list and free it.
1610+
*/
16071611
static void
1608-
arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
1612+
arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
16091613
{
16101614
arc_buf_t **bufp;
16111615

@@ -1655,7 +1659,7 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
16551659
}
16561660

16571661
/* only remove the buf if requested */
1658-
if (!all)
1662+
if (!remove)
16591663
return;
16601664

16611665
/* remove the buf from the hdr list */
@@ -2265,7 +2269,7 @@ arc_do_user_evicts(void)
22652269
mutex_exit(&arc_eviction_mtx);
22662270

22672271
if (buf->b_efunc != NULL)
2268-
VERIFY(buf->b_efunc(buf) == 0);
2272+
VERIFY0(buf->b_efunc(buf->b_private));
22692273

22702274
buf->b_efunc = NULL;
22712275
buf->b_private = NULL;
@@ -3576,16 +3580,25 @@ arc_freed(spa_t *spa, const blkptr_t *bp)
35763580
}
35773581

35783582
/*
3579-
* This is used by the DMU to let the ARC know that a buffer is
3580-
* being evicted, so the ARC should clean up. If this arc buf
3581-
* is not yet in the evicted state, it will be put there.
3583+
* Clear the user eviction callback set by arc_set_callback(), first calling
3584+
* it if it exists. Because the presence of a callback keeps an arc_buf cached
3585+
* clearing the callback may result in the arc_buf being destroyed. However,
3586+
* it will not result in the *last* arc_buf being destroyed, hence the data
3587+
* will remain cached in the ARC. We make a copy of the arc buffer here so
3588+
* that we can process the callback without holding any locks.
3589+
*
3590+
* It's possible that the callback is already in the process of being cleared
3591+
* by another thread. In this case we can not clear the callback.
3592+
*
3593+
* Returns B_TRUE if the callback was successfully called and cleared.
35823594
*/
3583-
int
3584-
arc_buf_evict(arc_buf_t *buf)
3595+
boolean_t
3596+
arc_clear_callback(arc_buf_t *buf)
35853597
{
35863598
arc_buf_hdr_t *hdr;
35873599
kmutex_t *hash_lock;
3588-
arc_buf_t **bufp;
3600+
arc_evict_func_t *efunc = buf->b_efunc;
3601+
void *private = buf->b_private;
35893602

35903603
mutex_enter(&buf->b_evict_lock);
35913604
hdr = buf->b_hdr;
@@ -3595,17 +3608,16 @@ arc_buf_evict(arc_buf_t *buf)
35953608
*/
35963609
ASSERT(buf->b_data == NULL);
35973610
mutex_exit(&buf->b_evict_lock);
3598-
return (0);
3611+
return (B_FALSE);
35993612
} else if (buf->b_data == NULL) {
3600-
arc_buf_t copy = *buf; /* structure assignment */
36013613
/*
36023614
* We are on the eviction list; process this buffer now
36033615
* but let arc_do_user_evicts() do the reaping.
36043616
*/
36053617
buf->b_efunc = NULL;
36063618
mutex_exit(&buf->b_evict_lock);
3607-
VERIFY(copy.b_efunc(&copy) == 0);
3608-
return (1);
3619+
VERIFY0(efunc(private));
3620+
return (B_TRUE);
36093621
}
36103622
hash_lock = HDR_LOCK(hdr);
36113623
mutex_enter(hash_lock);
@@ -3615,48 +3627,21 @@ arc_buf_evict(arc_buf_t *buf)
36153627
ASSERT3U(refcount_count(&hdr->b_refcnt), <, hdr->b_datacnt);
36163628
ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
36173629

3618-
/*
3619-
* Pull this buffer off of the hdr
3620-
*/
3621-
bufp = &hdr->b_buf;
3622-
while (*bufp != buf)
3623-
bufp = &(*bufp)->b_next;
3624-
*bufp = buf->b_next;
3625-
3626-
ASSERT(buf->b_data != NULL);
3627-
arc_buf_destroy(buf, FALSE, FALSE);
3628-
3629-
if (hdr->b_datacnt == 0) {
3630-
arc_state_t *old_state = hdr->b_state;
3631-
arc_state_t *evicted_state;
3632-
3633-
ASSERT(hdr->b_buf == NULL);
3634-
ASSERT(refcount_is_zero(&hdr->b_refcnt));
3635-
3636-
evicted_state =
3637-
(old_state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost;
3638-
3639-
mutex_enter(&old_state->arcs_mtx);
3640-
mutex_enter(&evicted_state->arcs_mtx);
3641-
3642-
arc_change_state(evicted_state, hdr, hash_lock);
3643-
ASSERT(HDR_IN_HASH_TABLE(hdr));
3644-
hdr->b_flags |= ARC_IN_HASH_TABLE;
3645-
hdr->b_flags &= ~ARC_BUF_AVAILABLE;
3630+
buf->b_efunc = NULL;
3631+
buf->b_private = NULL;
36463632

3647-
mutex_exit(&evicted_state->arcs_mtx);
3648-
mutex_exit(&old_state->arcs_mtx);
3633+
if (hdr->b_datacnt > 1) {
3634+
mutex_exit(&buf->b_evict_lock);
3635+
arc_buf_destroy(buf, FALSE, TRUE);
3636+
} else {
3637+
ASSERT(buf == hdr->b_buf);
3638+
hdr->b_flags |= ARC_BUF_AVAILABLE;
3639+
mutex_exit(&buf->b_evict_lock);
36493640
}
3650-
mutex_exit(hash_lock);
3651-
mutex_exit(&buf->b_evict_lock);
36523641

3653-
VERIFY(buf->b_efunc(buf) == 0);
3654-
buf->b_efunc = NULL;
3655-
buf->b_private = NULL;
3656-
buf->b_hdr = NULL;
3657-
buf->b_next = NULL;
3658-
kmem_cache_free(buf_cache, buf);
3659-
return (1);
3642+
mutex_exit(hash_lock);
3643+
VERIFY0(efunc(private));
3644+
return (B_TRUE);
36603645
}
36613646

36623647
/*
@@ -3813,17 +3798,6 @@ arc_released(arc_buf_t *buf)
38133798
return (released);
38143799
}
38153800

3816-
int
3817-
arc_has_callback(arc_buf_t *buf)
3818-
{
3819-
int callback;
3820-
3821-
mutex_enter(&buf->b_evict_lock);
3822-
callback = (buf->b_efunc != NULL);
3823-
mutex_exit(&buf->b_evict_lock);
3824-
return (callback);
3825-
}
3826-
38273801
#ifdef ZFS_DEBUG
38283802
int
38293803
arc_referenced(arc_buf_t *buf)

module/zfs/dbuf.c

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,7 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
211211
}
212212

213213
/*
214-
* Remove an entry from the hash table. This operation will
215-
* fail if there are any existing holds on the db.
214+
* Remove an entry from the hash table. It must be in the EVICTING state.
216215
*/
217216
static void
218217
dbuf_hash_remove(dmu_buf_impl_t *db)
@@ -226,7 +225,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
226225
idx = hv & h->hash_table_mask;
227226

228227
/*
229-
* We musn't hold db_mtx to maintin lock ordering:
228+
* We musn't hold db_mtx to maintain lock ordering:
230229
* DBUF_HASH_MUTEX > db_mtx.
231230
*/
232231
ASSERT(refcount_is_zero(&db->db_holds));
@@ -487,7 +486,6 @@ static void
487486
dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
488487
{
489488
ASSERT(MUTEX_HELD(&db->db_mtx));
490-
ASSERT(db->db_buf == NULL || !arc_has_callback(db->db_buf));
491489
db->db_buf = buf;
492490
if (buf != NULL) {
493491
ASSERT(buf->b_data != NULL);
@@ -1595,20 +1593,23 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
15951593
* when we are not holding the dn_dbufs_mtx, we can't clear the
15961594
* entry in the dn_dbufs list. We have to wait until dbuf_destroy()
15971595
* in this case. For callers from the DMU we will usually see:
1598-
* dbuf_clear()->arc_buf_evict()->dbuf_do_evict()->dbuf_destroy()
1596+
* dbuf_clear()->arc_clear_callback()->dbuf_do_evict()->dbuf_destroy()
15991597
* For the arc callback, we will usually see:
16001598
* dbuf_do_evict()->dbuf_clear();dbuf_destroy()
16011599
* Sometimes, though, we will get a mix of these two:
1602-
* DMU: dbuf_clear()->arc_buf_evict()
1600+
* DMU: dbuf_clear()->arc_clear_callback()
16031601
* ARC: dbuf_do_evict()->dbuf_destroy()
1602+
*
1603+
* This routine will dissociate the dbuf from the arc, by calling
1604+
* arc_clear_callback(), but will not evict the data from the ARC.
16041605
*/
16051606
void
16061607
dbuf_clear(dmu_buf_impl_t *db)
16071608
{
16081609
dnode_t *dn;
16091610
dmu_buf_impl_t *parent = db->db_parent;
16101611
dmu_buf_impl_t *dndb;
1611-
int dbuf_gone = FALSE;
1612+
boolean_t dbuf_gone = B_FALSE;
16121613

16131614
ASSERT(MUTEX_HELD(&db->db_mtx));
16141615
ASSERT(refcount_is_zero(&db->db_holds));
@@ -1654,7 +1655,7 @@ dbuf_clear(dmu_buf_impl_t *db)
16541655
}
16551656

16561657
if (db->db_buf)
1657-
dbuf_gone = arc_buf_evict(db->db_buf);
1658+
dbuf_gone = arc_clear_callback(db->db_buf);
16581659

16591660
if (!dbuf_gone)
16601661
mutex_exit(&db->db_mtx);
@@ -1831,8 +1832,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
18311832
static int
18321833
dbuf_do_evict(void *private)
18331834
{
1834-
arc_buf_t *buf = private;
1835-
dmu_buf_impl_t *db = buf->b_private;
1835+
dmu_buf_impl_t *db = private;
18361836

18371837
if (!MUTEX_HELD(&db->db_mtx))
18381838
mutex_enter(&db->db_mtx);
@@ -2239,11 +2239,23 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
22392239
* block on-disk. If so, then we simply evict
22402240
* ourselves.
22412241
*/
2242-
if (!DBUF_IS_CACHEABLE(db) ||
2243-
arc_buf_eviction_needed(db->db_buf))
2242+
if (!DBUF_IS_CACHEABLE(db)) {
2243+
if (db->db_blkptr != NULL &&
2244+
!BP_IS_HOLE(db->db_blkptr) &&
2245+
!BP_IS_EMBEDDED(db->db_blkptr)) {
2246+
spa_t *spa =
2247+
dmu_objset_spa(db->db_objset);
2248+
blkptr_t bp = *db->db_blkptr;
2249+
dbuf_clear(db);
2250+
arc_freed(spa, &bp);
2251+
} else {
2252+
dbuf_clear(db);
2253+
}
2254+
} else if (arc_buf_eviction_needed(db->db_buf)) {
22442255
dbuf_clear(db);
2245-
else
2256+
} else {
22462257
mutex_exit(&db->db_mtx);
2258+
}
22472259
}
22482260
} else {
22492261
mutex_exit(&db->db_mtx);

0 commit comments

Comments
 (0)