Skip to content

Commit 9925c28

Browse files
Alex Reecebehlendorf
authored andcommitted
Illumos 5095 - panic when adding a duplicate dbuf to dn_dbufs
5095 panic when adding a duplicate dbuf to dn_dbufs Author: Alex Reece <alex@delphix.com> Reviewed by: Adam Leventhal <adam.leventhal@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Mattew Ahrens <mahrens@delphix.com> Reviewed by: Dan Kimmel <dan.kimmel@delphix.com> Reviewed by: Dan McDonald <danmcd@omniti.com> Reviewed by: Josef Sipek <jeffpc@josefsipek.net> Approved by: Robert Mustacchi <rm@joyent.com> References: https://www.illumos.org/issues/5095 illumos/illumos-gate@86bb58a Ported-by: Chris Dunlop <chris@onthe.net.au> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
1 parent 5aea364 commit 9925c28

File tree

4 files changed

+36
-23
lines changed

4 files changed

+36
-23
lines changed

include/sys/dbuf.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,13 @@ extern "C" {
6666
* | |
6767
* | |
6868
* +--------> NOFILL -------+
69+
*
70+
* DB_SEARCH is an invalid state for a dbuf. It is used by dbuf_free_range
71+
* to find all dbufs in a range of a dnode and must be less than any other
72+
* dbuf_states_t (see comment on dn_dbufs in dnode.h).
6973
*/
7074
typedef enum dbuf_states {
75+
DB_SEARCH = -1,
7176
DB_UNCACHED,
7277
DB_FILL,
7378
DB_NOFILL,
@@ -213,9 +218,6 @@ typedef struct dmu_buf_impl {
213218
/* pointer to most recent dirty record for this buffer */
214219
dbuf_dirty_record_t *db_last_dirty;
215220

216-
/* Creation time of dbuf (see comment in dbuf_compare). */
217-
hrtime_t db_creation;
218-
219221
/*
220222
* Our link on the owner dnodes's dn_dbufs list.
221223
* Protected by its dn_dbufs_mtx.

include/sys/dnode.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,18 @@ typedef struct dnode {
233233
refcount_t dn_holds;
234234

235235
kmutex_t dn_dbufs_mtx;
236-
avl_tree_t dn_dbufs; /* descendent dbufs */
236+
/*
237+
* Descendent dbufs, ordered by dbuf_compare. Note that dn_dbufs
238+
* can contain multiple dbufs of the same (level, blkid) when a
239+
* dbuf is marked DB_EVICTING without being removed from
240+
* dn_dbufs. To maintain the avl invariant that there cannot be
241+
* duplicate entries, we order the dbufs by an arbitrary value -
242+
* their address in memory. This means that dn_dbufs cannot be used to
243+
* directly look up a dbuf. Instead, callers must use avl_walk, have
244+
* a reference to the dbuf, or look up a non-existant node with
245+
* db_state = DB_SEARCH (see dbuf_free_range for an example).
246+
*/
247+
avl_tree_t dn_dbufs;
237248

238249
/* protected by dn_struct_rwlock */
239250
struct dmu_buf_impl *dn_bonus; /* bonus buffer dbuf */

module/zfs/dbuf.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,6 @@ dbuf_cons(void *vdb, void *unused, int kmflag)
9494
cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL);
9595
refcount_create(&db->db_holds);
9696

97-
db->db_creation = gethrtime();
98-
9997
return (0);
10098
}
10199

@@ -884,7 +882,7 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
884882
db_seach = kmem_alloc(sizeof (dmu_buf_impl_t), KM_SLEEP);
885883
db_search->db_level = 0;
886884
db_search->db_blkid = start_blkid;
887-
db_search->db_creation = 0;
885+
db_search->db_state = DB_SEARCH;
888886

889887
mutex_enter(&dn->dn_dbufs_mtx);
890888
if (start_blkid >= dn->dn_unlisted_l0_blkid && !freespill) {

module/zfs/dnode.c

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -70,33 +70,35 @@ dbuf_compare(const void *x1, const void *x2)
7070

7171
if (d1->db_level < d2->db_level) {
7272
return (-1);
73-
} else if (d1->db_level > d2->db_level) {
73+
}
74+
if (d1->db_level > d2->db_level) {
7475
return (1);
7576
}
7677

7778
if (d1->db_blkid < d2->db_blkid) {
7879
return (-1);
79-
} else if (d1->db_blkid > d2->db_blkid) {
80+
}
81+
if (d1->db_blkid > d2->db_blkid) {
8082
return (1);
8183
}
8284

83-
/*
84-
* If a dbuf is being evicted while dn_dbufs_mutex is not held, we set
85-
* the db_state to DB_EVICTING but do not remove it from dn_dbufs. If
86-
* another thread creates a dbuf of the same blkid before the dbuf is
87-
* removed from dn_dbufs, we can reach a state where there are two
88-
* dbufs of the same blkid and level in db_dbufs. To maintain the avl
89-
* invariant that there cannot be duplicate items, we distinguish
90-
* between these two dbufs based on the time they were created.
91-
*/
92-
if (d1->db_creation < d2->db_creation) {
85+
if (d1->db_state < d2->db_state) {
9386
return (-1);
94-
} else if (d1->db_creation > d2->db_creation) {
87+
}
88+
if (d1->db_state > d2->db_state) {
9589
return (1);
96-
} else {
97-
ASSERT3P(d1, ==, d2);
98-
return (0);
9990
}
91+
92+
ASSERT3S(d1->db_state, !=, DB_SEARCH);
93+
ASSERT3S(d2->db_state, !=, DB_SEARCH);
94+
95+
if ((uintptr_t)d1 < (uintptr_t)d2) {
96+
return (-1);
97+
}
98+
if ((uintptr_t)d1 > (uintptr_t)d2) {
99+
return (1);
100+
}
101+
return (0);
100102
}
101103

102104
/* ARGSUSED */

0 commit comments

Comments
 (0)