Skip to content

Commit 36da08e

Browse files
Prakash Suryabehlendorf
authored andcommitted
Illumos 5817 - change type of arcs_size from uint64_t to refcount_t
5817 change type of arcs_size from uint64_t to refcount_t Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Paul Dagnelie <paul.dagnelie@delphix.com> Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Alex Reece <alex@delphix.com> Reviewed by: Richard Elling <richard.elling@richardelling.com> Approved by: Garrett D'Amore <garrett@damore.org> References: https://www.illumos.org/issues/5817 illumos/illumos-gate@2fd872a Ported-by: Brian Behlendorf <behlendorf1@llnl.gov> Issue #3533
1 parent 500445c commit 36da08e

File tree

2 files changed

+113
-28
lines changed

2 files changed

+113
-28
lines changed

include/sys/arc_impl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ typedef struct arc_state {
7979
* total amount of data in this state; this includes: evictable,
8080
* non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA.
8181
*/
82-
uint64_t arcs_size;
82+
refcount_t arcs_size;
8383
/*
8484
* supports the "dbufs" kstat
8585
*/

module/zfs/arc.c

Lines changed: 112 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@
2020
*/
2121
/*
2222
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23-
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
24-
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
23+
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
24+
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25+
* Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
2526
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
2627
*/
2728

@@ -132,6 +133,7 @@
132133
#include <sys/zio_compress.h>
133134
#include <sys/zfs_context.h>
134135
#include <sys/arc.h>
136+
#include <sys/refcount.h>
135137
#include <sys/vdev.h>
136138
#include <sys/vdev_impl.h>
137139
#include <sys/dsl_pool.h>
@@ -1553,12 +1555,75 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
15531555
buf_hash_remove(hdr);
15541556

15551557
/* adjust state sizes (ignore arc_l2c_only) */
1556-
if (to_delta && new_state != arc_l2c_only)
1557-
atomic_add_64(&new_state->arcs_size, to_delta);
1558+
1559+
if (to_delta && new_state != arc_l2c_only) {
1560+
ASSERT(HDR_HAS_L1HDR(hdr));
1561+
if (GHOST_STATE(new_state)) {
1562+
ASSERT0(datacnt);
1563+
1564+
/*
1565+
* We moving a header to a ghost state, we first
1566+
* remove all arc buffers. Thus, we'll have a
1567+
* datacnt of zero, and no arc buffer to use for
1568+
* the reference. As a result, we use the arc
1569+
* header pointer for the reference.
1570+
*/
1571+
(void) refcount_add_many(&new_state->arcs_size,
1572+
hdr->b_size, hdr);
1573+
} else {
1574+
arc_buf_t *buf;
1575+
ASSERT3U(datacnt, !=, 0);
1576+
1577+
/*
1578+
* Each individual buffer holds a unique reference,
1579+
* thus we must remove each of these references one
1580+
* at a time.
1581+
*/
1582+
for (buf = hdr->b_l1hdr.b_buf; buf != NULL;
1583+
buf = buf->b_next) {
1584+
(void) refcount_add_many(&new_state->arcs_size,
1585+
hdr->b_size, buf);
1586+
}
1587+
}
1588+
}
1589+
15581590
if (from_delta && old_state != arc_l2c_only) {
1559-
ASSERT3U(old_state->arcs_size, >=, from_delta);
1560-
atomic_add_64(&old_state->arcs_size, -from_delta);
1591+
ASSERT(HDR_HAS_L1HDR(hdr));
1592+
if (GHOST_STATE(old_state)) {
1593+
/*
1594+
* When moving a header off of a ghost state,
1595+
* there's the possibility for datacnt to be
1596+
* non-zero. This is because we first add the
1597+
* arc buffer to the header prior to changing
1598+
* the header's state. Since we used the header
1599+
* for the reference when putting the header on
1600+
* the ghost state, we must balance that and use
1601+
* the header when removing off the ghost state
1602+
* (even though datacnt is non zero).
1603+
*/
1604+
1605+
IMPLY(datacnt == 0, new_state == arc_anon ||
1606+
new_state == arc_l2c_only);
1607+
1608+
(void) refcount_remove_many(&old_state->arcs_size,
1609+
hdr->b_size, hdr);
1610+
} else {
1611+
arc_buf_t *buf;
1612+
ASSERT3U(datacnt, !=, 0);
1613+
1614+
/*
1615+
* Each individual buffer holds a unique reference,
1616+
* thus we must remove each of these references one
1617+
* at a time.
1618+
*/
1619+
for (buf = hdr->b_l1hdr.b_buf; buf != NULL;
1620+
buf = buf->b_next) {
1621+
(void) refcount_remove_many(
1622+
&old_state->arcs_size, hdr->b_size, buf);
1623+
}
1624+
}
15611625
}
1626+
15621627
if (HDR_HAS_L1HDR(hdr))
15631628
hdr->b_l1hdr.b_state = new_state;
15641629

@@ -1925,8 +1990,8 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t remove)
19251990
ASSERT3U(*cnt, >=, size);
19261991
atomic_add_64(cnt, -size);
19271992
}
1928-
ASSERT3U(state->arcs_size, >=, size);
1929-
atomic_add_64(&state->arcs_size, -size);
1993+
1994+
(void) refcount_remove_many(&state->arcs_size, size, buf);
19301995
buf->b_data = NULL;
19311996

19321997
/*
@@ -2804,7 +2869,8 @@ arc_adjust_meta_only(void)
28042869
* evict some from the MRU here, and some from the MFU below.
28052870
*/
28062871
target = MIN((int64_t)(arc_meta_used - arc_meta_limit),
2807-
(int64_t)(arc_anon->arcs_size + arc_mru->arcs_size - arc_p));
2872+
(int64_t)(refcount_count(&arc_anon->arcs_size) +
2873+
refcount_count(&arc_mru->arcs_size) - arc_p));
28082874

28092875
total_evicted += arc_adjust_impl(arc_mru, 0, target, ARC_BUFC_METADATA);
28102876

@@ -2814,7 +2880,7 @@ arc_adjust_meta_only(void)
28142880
* space alloted to the MFU (which is defined as arc_c - arc_p).
28152881
*/
28162882
target = MIN((int64_t)(arc_meta_used - arc_meta_limit),
2817-
(int64_t)(arc_mfu->arcs_size - (arc_c - arc_p)));
2883+
(int64_t)(refcount_count(&arc_mfu->arcs_size) - (arc_c - arc_p)));
28182884

28192885
total_evicted += arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_METADATA);
28202886

@@ -2932,8 +2998,8 @@ arc_adjust(void)
29322998
* arc_p here, and then evict more from the MFU below.
29332999
*/
29343000
target = MIN((int64_t)(arc_size - arc_c),
2935-
(int64_t)(arc_anon->arcs_size + arc_mru->arcs_size + arc_meta_used -
2936-
arc_p));
3001+
(int64_t)(refcount_count(&arc_anon->arcs_size) +
3002+
refcount_count(&arc_mru->arcs_size) + arc_meta_used - arc_p));
29373003

29383004
/*
29393005
* If we're below arc_meta_min, always prefer to evict data.
@@ -3017,7 +3083,8 @@ arc_adjust(void)
30173083
* cache. The following logic enforces these limits on the ghost
30183084
* caches, and evicts from them as needed.
30193085
*/
3020-
target = arc_mru->arcs_size + arc_mru_ghost->arcs_size - arc_c;
3086+
target = refcount_count(&arc_mru->arcs_size) +
3087+
refcount_count(&arc_mru_ghost->arcs_size) - arc_c;
30213088

30223089
bytes = arc_adjust_impl(arc_mru_ghost, 0, target, ARC_BUFC_DATA);
30233090
total_evicted += bytes;
@@ -3035,7 +3102,8 @@ arc_adjust(void)
30353102
* mru + mfu + mru ghost + mfu ghost <= 2 * arc_c
30363103
* mru ghost + mfu ghost <= arc_c
30373104
*/
3038-
target = arc_mru_ghost->arcs_size + arc_mfu_ghost->arcs_size - arc_c;
3105+
target = refcount_count(&arc_mru_ghost->arcs_size) +
3106+
refcount_count(&arc_mfu_ghost->arcs_size) - arc_c;
30393107

30403108
bytes = arc_adjust_impl(arc_mfu_ghost, 0, target, ARC_BUFC_DATA);
30413109
total_evicted += bytes;
@@ -3650,6 +3718,8 @@ static void
36503718
arc_adapt(int bytes, arc_state_t *state)
36513719
{
36523720
int mult;
3721+
int64_t mrug_size = refcount_count(&arc_mru_ghost->arcs_size);
3722+
int64_t mfug_size = refcount_count(&arc_mfu_ghost->arcs_size);
36533723

36543724
if (state == arc_l2c_only)
36553725
return;
@@ -3664,19 +3734,15 @@ arc_adapt(int bytes, arc_state_t *state)
36643734
* target size of the MRU list.
36653735
*/
36663736
if (state == arc_mru_ghost) {
3667-
mult = ((arc_mru_ghost->arcs_size >= arc_mfu_ghost->arcs_size) ?
3668-
1 : (arc_mfu_ghost->arcs_size/arc_mru_ghost->arcs_size));
3669-
3737+
mult = (mrug_size >= mfug_size) ? 1 : (mfug_size / mrug_size);
36703738
if (!zfs_arc_p_dampener_disable)
36713739
mult = MIN(mult, 10); /* avoid wild arc_p adjustment */
36723740

36733741
arc_p = MIN(arc_c, arc_p + bytes * mult);
36743742
} else if (state == arc_mfu_ghost) {
36753743
uint64_t delta;
36763744

3677-
mult = ((arc_mfu_ghost->arcs_size >= arc_mru_ghost->arcs_size) ?
3678-
1 : (arc_mru_ghost->arcs_size/arc_mfu_ghost->arcs_size));
3679-
3745+
mult = (mfug_size >= mrug_size) ? 1 : (mrug_size / mfug_size);
36803746
if (!zfs_arc_p_dampener_disable)
36813747
mult = MIN(mult, 10);
36823748

@@ -3794,8 +3860,9 @@ arc_get_data_buf(arc_buf_t *buf)
37943860
*/
37953861
if (!GHOST_STATE(buf->b_hdr->b_l1hdr.b_state)) {
37963862
arc_buf_hdr_t *hdr = buf->b_hdr;
3863+
arc_state_t *state = hdr->b_l1hdr.b_state;
37973864

3798-
atomic_add_64(&hdr->b_l1hdr.b_state->arcs_size, size);
3865+
(void) refcount_add_many(&state->arcs_size, size, buf);
37993866

38003867
/*
38013868
* If this is reached via arc_read, the link is
@@ -3816,7 +3883,8 @@ arc_get_data_buf(arc_buf_t *buf)
38163883
* data, and we have outgrown arc_p, update arc_p
38173884
*/
38183885
if (arc_size < arc_c && hdr->b_l1hdr.b_state == arc_anon &&
3819-
arc_anon->arcs_size + arc_mru->arcs_size > arc_p)
3886+
(refcount_count(&arc_anon->arcs_size) +
3887+
refcount_count(&arc_mru->arcs_size) > arc_p))
38203888
arc_p = MIN(arc_c, arc_p + size);
38213889
}
38223890
}
@@ -4728,8 +4796,10 @@ arc_release(arc_buf_t *buf, void *tag)
47284796
buf->b_next = NULL;
47294797

47304798
ASSERT3P(state, !=, arc_l2c_only);
4731-
ASSERT3U(state->arcs_size, >=, hdr->b_size);
4732-
atomic_add_64(&state->arcs_size, -hdr->b_size);
4799+
4800+
(void) refcount_remove_many(
4801+
&state->arcs_size, hdr->b_size, buf);
4802+
47334803
if (refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) {
47344804
uint64_t *size;
47354805

@@ -4777,7 +4847,7 @@ arc_release(arc_buf_t *buf, void *tag)
47774847
(void) refcount_add(&nhdr->b_l1hdr.b_refcnt, tag);
47784848
buf->b_hdr = nhdr;
47794849
mutex_exit(&buf->b_evict_lock);
4780-
atomic_add_64(&arc_anon->arcs_size, blksz);
4850+
(void) refcount_add_many(&arc_anon->arcs_size, blksz, buf);
47814851
} else {
47824852
mutex_exit(&buf->b_evict_lock);
47834853
ASSERT(refcount_count(&hdr->b_l1hdr.b_refcnt) == 1);
@@ -5033,7 +5103,8 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
50335103
* network delays from blocking transactions that are ready to be
50345104
* assigned to a txg.
50355105
*/
5036-
anon_size = MAX((int64_t)(arc_anon->arcs_size - arc_loaned_bytes), 0);
5106+
anon_size = MAX((int64_t)(refcount_count(&arc_anon->arcs_size) -
5107+
arc_loaned_bytes), 0);
50375108

50385109
/*
50395110
* Writes will, almost always, require additional memory allocations
@@ -5071,7 +5142,7 @@ static void
50715142
arc_kstat_update_state(arc_state_t *state, kstat_named_t *size,
50725143
kstat_named_t *evict_data, kstat_named_t *evict_metadata)
50735144
{
5074-
size->value.ui64 = state->arcs_size;
5145+
size->value.ui64 = refcount_count(&state->arcs_size);
50755146
evict_data->value.ui64 = state->arcs_lsize[ARC_BUFC_DATA];
50765147
evict_metadata->value.ui64 = state->arcs_lsize[ARC_BUFC_METADATA];
50775148
}
@@ -5323,6 +5394,13 @@ arc_init(void)
53235394
arc_mfu_ghost->arcs_state = ARC_STATE_MFU_GHOST;
53245395
arc_l2c_only->arcs_state = ARC_STATE_L2C_ONLY;
53255396

5397+
refcount_create(&arc_anon->arcs_size);
5398+
refcount_create(&arc_mru->arcs_size);
5399+
refcount_create(&arc_mru_ghost->arcs_size);
5400+
refcount_create(&arc_mfu->arcs_size);
5401+
refcount_create(&arc_mfu_ghost->arcs_size);
5402+
refcount_create(&arc_l2c_only->arcs_size);
5403+
53265404
buf_init();
53275405

53285406
arc_reclaim_thread_exit = FALSE;
@@ -5438,6 +5516,13 @@ arc_fini(void)
54385516
mutex_destroy(&arc_user_evicts_lock);
54395517
cv_destroy(&arc_user_evicts_cv);
54405518

5519+
refcount_destroy(&arc_anon->arcs_size);
5520+
refcount_destroy(&arc_mru->arcs_size);
5521+
refcount_destroy(&arc_mru_ghost->arcs_size);
5522+
refcount_destroy(&arc_mfu->arcs_size);
5523+
refcount_destroy(&arc_mfu_ghost->arcs_size);
5524+
refcount_destroy(&arc_l2c_only->arcs_size);
5525+
54415526
multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_METADATA]);
54425527
multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
54435528
multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_METADATA]);

0 commit comments

Comments
 (0)