20
20
*/
21
21
/*
22
22
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
24
- * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
23
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
24
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
25
+ * Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
25
26
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
26
27
*/
27
28
132
133
#include <sys/zio_compress.h>
133
134
#include <sys/zfs_context.h>
134
135
#include <sys/arc.h>
136
+ #include <sys/refcount.h>
135
137
#include <sys/vdev.h>
136
138
#include <sys/vdev_impl.h>
137
139
#include <sys/dsl_pool.h>
@@ -1553,12 +1555,75 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
1553
1555
buf_hash_remove (hdr );
1554
1556
1555
1557
/* adjust state sizes (ignore arc_l2c_only) */
1556
- if (to_delta && new_state != arc_l2c_only )
1557
- atomic_add_64 (& new_state -> arcs_size , to_delta );
1558
+
1559
+ if (to_delta && new_state != arc_l2c_only ) {
1560
+ ASSERT (HDR_HAS_L1HDR (hdr ));
1561
+ if (GHOST_STATE (new_state )) {
1562
+ ASSERT0 (datacnt );
1563
+
1564
+ /*
1565
+ * We moving a header to a ghost state, we first
1566
+ * remove all arc buffers. Thus, we'll have a
1567
+ * datacnt of zero, and no arc buffer to use for
1568
+ * the reference. As a result, we use the arc
1569
+ * header pointer for the reference.
1570
+ */
1571
+ (void ) refcount_add_many (& new_state -> arcs_size ,
1572
+ hdr -> b_size , hdr );
1573
+ } else {
1574
+ arc_buf_t * buf ;
1575
+ ASSERT3U (datacnt , != , 0 );
1576
+
1577
+ /*
1578
+ * Each individual buffer holds a unique reference,
1579
+ * thus we must remove each of these references one
1580
+ * at a time.
1581
+ */
1582
+ for (buf = hdr -> b_l1hdr .b_buf ; buf != NULL ;
1583
+ buf = buf -> b_next ) {
1584
+ (void ) refcount_add_many (& new_state -> arcs_size ,
1585
+ hdr -> b_size , buf );
1586
+ }
1587
+ }
1588
+ }
1589
+
1558
1590
if (from_delta && old_state != arc_l2c_only ) {
1559
- ASSERT3U (old_state -> arcs_size , >=, from_delta );
1560
- atomic_add_64 (& old_state -> arcs_size , - from_delta );
1591
+ ASSERT (HDR_HAS_L1HDR (hdr ));
1592
+ if (GHOST_STATE (old_state )) {
1593
+ /*
1594
+ * When moving a header off of a ghost state,
1595
+ * there's the possibility for datacnt to be
1596
+ * non-zero. This is because we first add the
1597
+ * arc buffer to the header prior to changing
1598
+ * the header's state. Since we used the header
1599
+ * for the reference when putting the header on
1600
+ * the ghost state, we must balance that and use
1601
+ * the header when removing off the ghost state
1602
+ * (even though datacnt is non zero).
1603
+ */
1604
+
1605
+ IMPLY (datacnt == 0 , new_state == arc_anon ||
1606
+ new_state == arc_l2c_only );
1607
+
1608
+ (void ) refcount_remove_many (& old_state -> arcs_size ,
1609
+ hdr -> b_size , hdr );
1610
+ } else {
1611
+ arc_buf_t * buf ;
1612
+ ASSERT3U (datacnt , != , 0 );
1613
+
1614
+ /*
1615
+ * Each individual buffer holds a unique reference,
1616
+ * thus we must remove each of these references one
1617
+ * at a time.
1618
+ */
1619
+ for (buf = hdr -> b_l1hdr .b_buf ; buf != NULL ;
1620
+ buf = buf -> b_next ) {
1621
+ (void ) refcount_remove_many (
1622
+ & old_state -> arcs_size , hdr -> b_size , buf );
1623
+ }
1624
+ }
1561
1625
}
1626
+
1562
1627
if (HDR_HAS_L1HDR (hdr ))
1563
1628
hdr -> b_l1hdr .b_state = new_state ;
1564
1629
@@ -1925,8 +1990,8 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t remove)
1925
1990
ASSERT3U (* cnt , >=, size );
1926
1991
atomic_add_64 (cnt , - size );
1927
1992
}
1928
- ASSERT3U ( state -> arcs_size , >=, size );
1929
- atomic_add_64 ( & state -> arcs_size , - size );
1993
+
1994
+ ( void ) refcount_remove_many ( & state -> arcs_size , size , buf );
1930
1995
buf -> b_data = NULL ;
1931
1996
1932
1997
/*
@@ -2804,7 +2869,8 @@ arc_adjust_meta_only(void)
2804
2869
* evict some from the MRU here, and some from the MFU below.
2805
2870
*/
2806
2871
target = MIN ((int64_t )(arc_meta_used - arc_meta_limit ),
2807
- (int64_t )(arc_anon -> arcs_size + arc_mru -> arcs_size - arc_p ));
2872
+ (int64_t )(refcount_count (& arc_anon -> arcs_size ) +
2873
+ refcount_count (& arc_mru -> arcs_size ) - arc_p ));
2808
2874
2809
2875
total_evicted += arc_adjust_impl (arc_mru , 0 , target , ARC_BUFC_METADATA );
2810
2876
@@ -2814,7 +2880,7 @@ arc_adjust_meta_only(void)
2814
2880
* space alloted to the MFU (which is defined as arc_c - arc_p).
2815
2881
*/
2816
2882
target = MIN ((int64_t )(arc_meta_used - arc_meta_limit ),
2817
- (int64_t )(arc_mfu -> arcs_size - (arc_c - arc_p )));
2883
+ (int64_t )(refcount_count ( & arc_mfu -> arcs_size ) - (arc_c - arc_p )));
2818
2884
2819
2885
total_evicted += arc_adjust_impl (arc_mfu , 0 , target , ARC_BUFC_METADATA );
2820
2886
@@ -2932,8 +2998,8 @@ arc_adjust(void)
2932
2998
* arc_p here, and then evict more from the MFU below.
2933
2999
*/
2934
3000
target = MIN ((int64_t )(arc_size - arc_c ),
2935
- (int64_t )(arc_anon -> arcs_size + arc_mru -> arcs_size + arc_meta_used -
2936
- arc_p ));
3001
+ (int64_t )(refcount_count ( & arc_anon -> arcs_size ) +
3002
+ refcount_count ( & arc_mru -> arcs_size ) + arc_meta_used - arc_p ));
2937
3003
2938
3004
/*
2939
3005
* If we're below arc_meta_min, always prefer to evict data.
@@ -3017,7 +3083,8 @@ arc_adjust(void)
3017
3083
* cache. The following logic enforces these limits on the ghost
3018
3084
* caches, and evicts from them as needed.
3019
3085
*/
3020
- target = arc_mru -> arcs_size + arc_mru_ghost -> arcs_size - arc_c ;
3086
+ target = refcount_count (& arc_mru -> arcs_size ) +
3087
+ refcount_count (& arc_mru_ghost -> arcs_size ) - arc_c ;
3021
3088
3022
3089
bytes = arc_adjust_impl (arc_mru_ghost , 0 , target , ARC_BUFC_DATA );
3023
3090
total_evicted += bytes ;
@@ -3035,7 +3102,8 @@ arc_adjust(void)
3035
3102
* mru + mfu + mru ghost + mfu ghost <= 2 * arc_c
3036
3103
* mru ghost + mfu ghost <= arc_c
3037
3104
*/
3038
- target = arc_mru_ghost -> arcs_size + arc_mfu_ghost -> arcs_size - arc_c ;
3105
+ target = refcount_count (& arc_mru_ghost -> arcs_size ) +
3106
+ refcount_count (& arc_mfu_ghost -> arcs_size ) - arc_c ;
3039
3107
3040
3108
bytes = arc_adjust_impl (arc_mfu_ghost , 0 , target , ARC_BUFC_DATA );
3041
3109
total_evicted += bytes ;
@@ -3650,6 +3718,8 @@ static void
3650
3718
arc_adapt (int bytes , arc_state_t * state )
3651
3719
{
3652
3720
int mult ;
3721
+ int64_t mrug_size = refcount_count (& arc_mru_ghost -> arcs_size );
3722
+ int64_t mfug_size = refcount_count (& arc_mfu_ghost -> arcs_size );
3653
3723
3654
3724
if (state == arc_l2c_only )
3655
3725
return ;
@@ -3664,19 +3734,15 @@ arc_adapt(int bytes, arc_state_t *state)
3664
3734
* target size of the MRU list.
3665
3735
*/
3666
3736
if (state == arc_mru_ghost ) {
3667
- mult = ((arc_mru_ghost -> arcs_size >= arc_mfu_ghost -> arcs_size ) ?
3668
- 1 : (arc_mfu_ghost -> arcs_size /arc_mru_ghost -> arcs_size ));
3669
-
3737
+ mult = (mrug_size >= mfug_size ) ? 1 : (mfug_size / mrug_size );
3670
3738
if (!zfs_arc_p_dampener_disable )
3671
3739
mult = MIN (mult , 10 ); /* avoid wild arc_p adjustment */
3672
3740
3673
3741
arc_p = MIN (arc_c , arc_p + bytes * mult );
3674
3742
} else if (state == arc_mfu_ghost ) {
3675
3743
uint64_t delta ;
3676
3744
3677
- mult = ((arc_mfu_ghost -> arcs_size >= arc_mru_ghost -> arcs_size ) ?
3678
- 1 : (arc_mru_ghost -> arcs_size /arc_mfu_ghost -> arcs_size ));
3679
-
3745
+ mult = (mfug_size >= mrug_size ) ? 1 : (mrug_size / mfug_size );
3680
3746
if (!zfs_arc_p_dampener_disable )
3681
3747
mult = MIN (mult , 10 );
3682
3748
@@ -3794,8 +3860,9 @@ arc_get_data_buf(arc_buf_t *buf)
3794
3860
*/
3795
3861
if (!GHOST_STATE (buf -> b_hdr -> b_l1hdr .b_state )) {
3796
3862
arc_buf_hdr_t * hdr = buf -> b_hdr ;
3863
+ arc_state_t * state = hdr -> b_l1hdr .b_state ;
3797
3864
3798
- atomic_add_64 ( & hdr -> b_l1hdr . b_state -> arcs_size , size );
3865
+ ( void ) refcount_add_many ( & state -> arcs_size , size , buf );
3799
3866
3800
3867
/*
3801
3868
* If this is reached via arc_read, the link is
@@ -3816,7 +3883,8 @@ arc_get_data_buf(arc_buf_t *buf)
3816
3883
* data, and we have outgrown arc_p, update arc_p
3817
3884
*/
3818
3885
if (arc_size < arc_c && hdr -> b_l1hdr .b_state == arc_anon &&
3819
- arc_anon -> arcs_size + arc_mru -> arcs_size > arc_p )
3886
+ (refcount_count (& arc_anon -> arcs_size ) +
3887
+ refcount_count (& arc_mru -> arcs_size ) > arc_p ))
3820
3888
arc_p = MIN (arc_c , arc_p + size );
3821
3889
}
3822
3890
}
@@ -4728,8 +4796,10 @@ arc_release(arc_buf_t *buf, void *tag)
4728
4796
buf -> b_next = NULL ;
4729
4797
4730
4798
ASSERT3P (state , != , arc_l2c_only );
4731
- ASSERT3U (state -> arcs_size , >=, hdr -> b_size );
4732
- atomic_add_64 (& state -> arcs_size , - hdr -> b_size );
4799
+
4800
+ (void ) refcount_remove_many (
4801
+ & state -> arcs_size , hdr -> b_size , buf );
4802
+
4733
4803
if (refcount_is_zero (& hdr -> b_l1hdr .b_refcnt )) {
4734
4804
uint64_t * size ;
4735
4805
@@ -4777,7 +4847,7 @@ arc_release(arc_buf_t *buf, void *tag)
4777
4847
(void ) refcount_add (& nhdr -> b_l1hdr .b_refcnt , tag );
4778
4848
buf -> b_hdr = nhdr ;
4779
4849
mutex_exit (& buf -> b_evict_lock );
4780
- atomic_add_64 ( & arc_anon -> arcs_size , blksz );
4850
+ ( void ) refcount_add_many ( & arc_anon -> arcs_size , blksz , buf );
4781
4851
} else {
4782
4852
mutex_exit (& buf -> b_evict_lock );
4783
4853
ASSERT (refcount_count (& hdr -> b_l1hdr .b_refcnt ) == 1 );
@@ -5033,7 +5103,8 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
5033
5103
* network delays from blocking transactions that are ready to be
5034
5104
* assigned to a txg.
5035
5105
*/
5036
- anon_size = MAX ((int64_t )(arc_anon -> arcs_size - arc_loaned_bytes ), 0 );
5106
+ anon_size = MAX ((int64_t )(refcount_count (& arc_anon -> arcs_size ) -
5107
+ arc_loaned_bytes ), 0 );
5037
5108
5038
5109
/*
5039
5110
* Writes will, almost always, require additional memory allocations
@@ -5071,7 +5142,7 @@ static void
5071
5142
arc_kstat_update_state (arc_state_t * state , kstat_named_t * size ,
5072
5143
kstat_named_t * evict_data , kstat_named_t * evict_metadata )
5073
5144
{
5074
- size -> value .ui64 = state -> arcs_size ;
5145
+ size -> value .ui64 = refcount_count ( & state -> arcs_size ) ;
5075
5146
evict_data -> value .ui64 = state -> arcs_lsize [ARC_BUFC_DATA ];
5076
5147
evict_metadata -> value .ui64 = state -> arcs_lsize [ARC_BUFC_METADATA ];
5077
5148
}
@@ -5323,6 +5394,13 @@ arc_init(void)
5323
5394
arc_mfu_ghost -> arcs_state = ARC_STATE_MFU_GHOST ;
5324
5395
arc_l2c_only -> arcs_state = ARC_STATE_L2C_ONLY ;
5325
5396
5397
+ refcount_create (& arc_anon -> arcs_size );
5398
+ refcount_create (& arc_mru -> arcs_size );
5399
+ refcount_create (& arc_mru_ghost -> arcs_size );
5400
+ refcount_create (& arc_mfu -> arcs_size );
5401
+ refcount_create (& arc_mfu_ghost -> arcs_size );
5402
+ refcount_create (& arc_l2c_only -> arcs_size );
5403
+
5326
5404
buf_init ();
5327
5405
5328
5406
arc_reclaim_thread_exit = FALSE;
@@ -5438,6 +5516,13 @@ arc_fini(void)
5438
5516
mutex_destroy (& arc_user_evicts_lock );
5439
5517
cv_destroy (& arc_user_evicts_cv );
5440
5518
5519
+ refcount_destroy (& arc_anon -> arcs_size );
5520
+ refcount_destroy (& arc_mru -> arcs_size );
5521
+ refcount_destroy (& arc_mru_ghost -> arcs_size );
5522
+ refcount_destroy (& arc_mfu -> arcs_size );
5523
+ refcount_destroy (& arc_mfu_ghost -> arcs_size );
5524
+ refcount_destroy (& arc_l2c_only -> arcs_size );
5525
+
5441
5526
multilist_destroy (& arc_mru -> arcs_list [ARC_BUFC_METADATA ]);
5442
5527
multilist_destroy (& arc_mru_ghost -> arcs_list [ARC_BUFC_METADATA ]);
5443
5528
multilist_destroy (& arc_mfu -> arcs_list [ARC_BUFC_METADATA ]);
0 commit comments