Skip to content

Commit 9bd274d

Browse files
ahrensbehlendorf
authored andcommitted
Illumos #4374
4374 dn_free_ranges should use range_tree_t Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Max Grossman <max.grossman@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com Reviewed by: Garrett D'Amore <garrett@damore.org> Reviewed by: Dan McDonald <danmcd@omniti.com> Approved by: Dan McDonald <danmcd@omniti.com> References: https://www.illumos.org/issues/4374 illumos/illumos-gate@bf16b11 Ported by: Tim Chase <tim@chase2k.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #2531
1 parent da53684 commit 9bd274d

File tree

19 files changed

+128
-174
lines changed

19 files changed

+128
-174
lines changed

cmd/zdb/zdb.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
/*
2323
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24-
* Copyright (c) 2013 by Delphix. All rights reserved.
24+
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
2525
*/
2626

2727
#include <stdio.h>
@@ -2788,7 +2788,8 @@ dump_simulated_ddt(spa_t *spa)
27882788
dds.dds_ref_psize = zdde->zdde_ref_psize;
27892789
dds.dds_ref_dsize = zdde->zdde_ref_dsize;
27902790

2791-
ddt_stat_add(&ddh_total.ddh_stat[highbit(refcnt) - 1], &dds, 0);
2791+
ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1],
2792+
&dds, 0);
27922793

27932794
umem_free(zdde, sizeof (*zdde));
27942795
}

cmd/zpool/zpool_vdev.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -740,7 +740,7 @@ make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log)
740740
int sector_size;
741741

742742
if (check_sector_size_database(path, &sector_size) == B_TRUE)
743-
ashift = highbit(sector_size) - 1;
743+
ashift = highbit64(sector_size) - 1;
744744
}
745745

746746
if (ashift > 0)

include/sys/dnode.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
*/
2121
/*
2222
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23-
* Copyright (c) 2013 by Delphix. All rights reserved.
23+
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
2424
*/
2525

2626
#ifndef _SYS_DNODE_H
@@ -220,7 +220,7 @@ typedef struct dnode {
220220
/* protected by dn_mtx: */
221221
kmutex_t dn_mtx;
222222
list_t dn_dirty_records[TXG_SIZE];
223-
avl_tree_t dn_ranges[TXG_SIZE];
223+
struct range_tree *dn_free_ranges[TXG_SIZE];
224224
uint64_t dn_allocated_txg;
225225
uint64_t dn_free_txg;
226226
uint64_t dn_assigned_txg;
@@ -302,8 +302,6 @@ void dnode_buf_byteswap(void *buf, size_t size);
302302
void dnode_verify(dnode_t *dn);
303303
int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
304304
void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
305-
void dnode_clear_range(dnode_t *dn, uint64_t blkid,
306-
uint64_t nblks, dmu_tx_t *tx);
307305
void dnode_diduse_space(dnode_t *dn, int64_t space);
308306
void dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx);
309307
void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t);

include/sys/range_tree.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
*/
2525

2626
/*
27-
* Copyright (c) 2013 by Delphix. All rights reserved.
27+
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
2828
*/
2929

3030
#ifndef _SYS_RANGE_TREE_H
@@ -85,6 +85,7 @@ void range_tree_stat_verify(range_tree_t *rt);
8585

8686
void range_tree_add(void *arg, uint64_t start, uint64_t size);
8787
void range_tree_remove(void *arg, uint64_t start, uint64_t size);
88+
void range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size);
8889

8990
void range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg);
9091
void range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg);

include/sys/zfs_context.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
/*
2626
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
2727
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
28-
* Copyright (c) 2013 by Delphix. All rights reserved.
28+
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
2929
*/
3030

3131
#ifndef _SYS_ZFS_CONTEXT_H
@@ -623,7 +623,7 @@ extern void delay(clock_t ticks);
623623

624624
extern uint64_t physmem;
625625

626-
extern int highbit(ulong_t i);
626+
extern int highbit64(uint64_t i);
627627
extern int random_get_bytes(uint8_t *ptr, size_t len);
628628
extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);
629629

lib/libzpool/kernel.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,17 +1014,15 @@ delay(clock_t ticks)
10141014
* High order bit is 31 (or 63 in _LP64 kernel).
10151015
*/
10161016
int
1017-
highbit(ulong_t i)
1017+
highbit64(uint64_t i)
10181018
{
10191019
register int h = 1;
10201020

10211021
if (i == 0)
10221022
return (0);
1023-
#ifdef _LP64
1024-
if (i & 0xffffffff00000000ul) {
1023+
if (i & 0xffffffff00000000ULL) {
10251024
h += 32; i >>= 32;
10261025
}
1027-
#endif
10281026
if (i & 0xffff0000) {
10291027
h += 16; i >>= 16;
10301028
}

module/zfs/dbuf.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
/*
2222
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
2323
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24-
* Copyright (c) 2013 by Delphix. All rights reserved.
24+
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
2525
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
2626
*/
2727

@@ -40,6 +40,7 @@
4040
#include <sys/dmu_zfetch.h>
4141
#include <sys/sa.h>
4242
#include <sys/sa_impl.h>
43+
#include <sys/range_tree.h>
4344

4445
struct dbuf_hold_impl_data {
4546
/* Function arguments */
@@ -1234,7 +1235,10 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
12341235
if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
12351236
db->db_blkid != DMU_SPILL_BLKID) {
12361237
mutex_enter(&dn->dn_mtx);
1237-
dnode_clear_range(dn, db->db_blkid, 1, tx);
1238+
if (dn->dn_free_ranges[txgoff] != NULL) {
1239+
range_tree_clear(dn->dn_free_ranges[txgoff],
1240+
db->db_blkid, 1);
1241+
}
12381242
mutex_exit(&dn->dn_mtx);
12391243
db->db_freed_in_flight = FALSE;
12401244
}

module/zfs/ddt.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
/*
2323
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24-
* Copyright (c) 2013 by Delphix. All rights reserved.
24+
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
2525
*/
2626

2727
#include <sys/zfs_context.h>
@@ -423,7 +423,7 @@ ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg)
423423

424424
ddt_stat_generate(ddt, dde, &dds);
425425

426-
bucket = highbit(dds.dds_ref_blocks) - 1;
426+
bucket = highbit64(dds.dds_ref_blocks) - 1;
427427
ASSERT(bucket >= 0);
428428

429429
ddh = &ddt->ddt_histogram[dde->dde_type][dde->dde_class];

module/zfs/dnode.c

Lines changed: 20 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
*/
2121
/*
2222
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23-
* Copyright (c) 2013 by Delphix. All rights reserved.
23+
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
2424
*/
2525

2626
#include <sys/zfs_context.h>
@@ -35,8 +35,7 @@
3535
#include <sys/spa.h>
3636
#include <sys/zio.h>
3737
#include <sys/dmu_zfetch.h>
38-
39-
static int free_range_compar(const void *node1, const void *node2);
38+
#include <sys/range_tree.h>
4039

4140
static kmem_cache_t *dnode_cache;
4241
/*
@@ -92,9 +91,7 @@ dnode_cons(void *arg, void *unused, int kmflag)
9291

9392
for (i = 0; i < TXG_SIZE; i++) {
9493
list_link_init(&dn->dn_dirty_link[i]);
95-
avl_create(&dn->dn_ranges[i], free_range_compar,
96-
sizeof (free_range_t),
97-
offsetof(struct free_range, fr_node));
94+
dn->dn_free_ranges[i] = NULL;
9895
list_create(&dn->dn_dirty_records[i],
9996
sizeof (dbuf_dirty_record_t),
10097
offsetof(dbuf_dirty_record_t, dr_dirty_node));
@@ -142,7 +139,7 @@ dnode_dest(void *arg, void *unused)
142139

143140
for (i = 0; i < TXG_SIZE; i++) {
144141
ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
145-
avl_destroy(&dn->dn_ranges[i]);
142+
ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
146143
list_destroy(&dn->dn_dirty_records[i]);
147144
ASSERT0(dn->dn_next_nblkptr[i]);
148145
ASSERT0(dn->dn_next_nlevels[i]);
@@ -313,19 +310,6 @@ dnode_buf_byteswap(void *vbuf, size_t size)
313310
}
314311
}
315312

316-
static int
317-
free_range_compar(const void *node1, const void *node2)
318-
{
319-
const free_range_t *rp1 = node1;
320-
const free_range_t *rp2 = node2;
321-
322-
if (rp1->fr_blkid < rp2->fr_blkid)
323-
return (-1);
324-
else if (rp1->fr_blkid > rp2->fr_blkid)
325-
return (1);
326-
else return (0);
327-
}
328-
329313
void
330314
dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx)
331315
{
@@ -374,7 +358,7 @@ dnode_setdblksz(dnode_t *dn, int size)
374358
1<<(sizeof (dn->dn_phys->dn_datablkszsec) * 8));
375359
dn->dn_datablksz = size;
376360
dn->dn_datablkszsec = size >> SPA_MINBLOCKSHIFT;
377-
dn->dn_datablkshift = ISP2(size) ? highbit(size - 1) : 0;
361+
dn->dn_datablkshift = ISP2(size) ? highbit64(size - 1) : 0;
378362
}
379363

380364
static dnode_t *
@@ -530,7 +514,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
530514
ASSERT0(dn->dn_next_blksz[i]);
531515
ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
532516
ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
533-
ASSERT0(avl_numnodes(&dn->dn_ranges[i]));
517+
ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
534518
}
535519

536520
dn->dn_type = ot;
@@ -695,7 +679,8 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
695679
list_move_tail(&ndn->dn_dirty_records[i],
696680
&odn->dn_dirty_records[i]);
697681
}
698-
bcopy(&odn->dn_ranges[0], &ndn->dn_ranges[0], sizeof (odn->dn_ranges));
682+
bcopy(&odn->dn_free_ranges[0], &ndn->dn_free_ranges[0],
683+
sizeof (odn->dn_free_ranges));
699684
ndn->dn_allocated_txg = odn->dn_allocated_txg;
700685
ndn->dn_free_txg = odn->dn_free_txg;
701686
ndn->dn_assigned_txg = odn->dn_assigned_txg;
@@ -758,8 +743,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
758743
list_create(&odn->dn_dirty_records[i],
759744
sizeof (dbuf_dirty_record_t),
760745
offsetof(dbuf_dirty_record_t, dr_dirty_node));
761-
odn->dn_ranges[i].avl_root = NULL;
762-
odn->dn_ranges[i].avl_numnodes = 0;
746+
odn->dn_free_ranges[i] = NULL;
763747
odn->dn_next_nlevels[i] = 0;
764748
odn->dn_next_indblkshift[i] = 0;
765749
odn->dn_next_bonustype[i] = 0;
@@ -1462,59 +1446,6 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
14621446
rw_downgrade(&dn->dn_struct_rwlock);
14631447
}
14641448

1465-
void
1466-
dnode_clear_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
1467-
{
1468-
avl_tree_t *tree = &dn->dn_ranges[tx->tx_txg&TXG_MASK];
1469-
avl_index_t where;
1470-
free_range_t *rp;
1471-
free_range_t rp_tofind;
1472-
uint64_t endblk = blkid + nblks;
1473-
1474-
ASSERT(MUTEX_HELD(&dn->dn_mtx));
1475-
ASSERT(nblks <= UINT64_MAX - blkid); /* no overflow */
1476-
1477-
dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n",
1478-
blkid, nblks, tx->tx_txg);
1479-
rp_tofind.fr_blkid = blkid;
1480-
rp = avl_find(tree, &rp_tofind, &where);
1481-
if (rp == NULL)
1482-
rp = avl_nearest(tree, where, AVL_BEFORE);
1483-
if (rp == NULL)
1484-
rp = avl_nearest(tree, where, AVL_AFTER);
1485-
1486-
while (rp && (rp->fr_blkid <= blkid + nblks)) {
1487-
uint64_t fr_endblk = rp->fr_blkid + rp->fr_nblks;
1488-
free_range_t *nrp = AVL_NEXT(tree, rp);
1489-
1490-
if (blkid <= rp->fr_blkid && endblk >= fr_endblk) {
1491-
/* clear this entire range */
1492-
avl_remove(tree, rp);
1493-
kmem_free(rp, sizeof (free_range_t));
1494-
} else if (blkid <= rp->fr_blkid &&
1495-
endblk > rp->fr_blkid && endblk < fr_endblk) {
1496-
/* clear the beginning of this range */
1497-
rp->fr_blkid = endblk;
1498-
rp->fr_nblks = fr_endblk - endblk;
1499-
} else if (blkid > rp->fr_blkid && blkid < fr_endblk &&
1500-
endblk >= fr_endblk) {
1501-
/* clear the end of this range */
1502-
rp->fr_nblks = blkid - rp->fr_blkid;
1503-
} else if (blkid > rp->fr_blkid && endblk < fr_endblk) {
1504-
/* clear a chunk out of this range */
1505-
free_range_t *new_rp =
1506-
kmem_alloc(sizeof (free_range_t), KM_PUSHPAGE);
1507-
1508-
new_rp->fr_blkid = endblk;
1509-
new_rp->fr_nblks = fr_endblk - endblk;
1510-
avl_insert_here(tree, new_rp, rp, AVL_AFTER);
1511-
rp->fr_nblks = blkid - rp->fr_blkid;
1512-
}
1513-
/* there may be no overlap */
1514-
rp = nrp;
1515-
}
1516-
}
1517-
15181449
void
15191450
dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
15201451
{
@@ -1665,22 +1596,17 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
16651596
* We will finish up this free operation in the syncing phase.
16661597
*/
16671598
mutex_enter(&dn->dn_mtx);
1668-
dnode_clear_range(dn, blkid, nblks, tx);
16691599
{
1670-
free_range_t *rp, *found;
1671-
avl_index_t where;
1672-
avl_tree_t *tree = &dn->dn_ranges[tx->tx_txg&TXG_MASK];
1673-
1674-
/* Add new range to dn_ranges */
1675-
rp = kmem_alloc(sizeof (free_range_t), KM_PUSHPAGE);
1676-
rp->fr_blkid = blkid;
1677-
rp->fr_nblks = nblks;
1678-
found = avl_find(tree, rp, &where);
1679-
ASSERT(found == NULL);
1680-
avl_insert(tree, rp, where);
1681-
dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n",
1682-
blkid, nblks, tx->tx_txg);
1600+
int txgoff = tx->tx_txg & TXG_MASK;
1601+
if (dn->dn_free_ranges[txgoff] == NULL) {
1602+
dn->dn_free_ranges[txgoff] =
1603+
range_tree_create(NULL, NULL, &dn->dn_mtx);
1604+
}
1605+
range_tree_clear(dn->dn_free_ranges[txgoff], blkid, nblks);
1606+
range_tree_add(dn->dn_free_ranges[txgoff], blkid, nblks);
16831607
}
1608+
dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n",
1609+
blkid, nblks, tx->tx_txg);
16841610
mutex_exit(&dn->dn_mtx);
16851611

16861612
dbuf_free_range(dn, blkid, blkid + nblks - 1, tx);
@@ -1708,7 +1634,6 @@ dnode_spill_freed(dnode_t *dn)
17081634
uint64_t
17091635
dnode_block_freed(dnode_t *dn, uint64_t blkid)
17101636
{
1711-
free_range_t range_tofind;
17121637
void *dp = spa_get_dsl(dn->dn_objset->os_spa);
17131638
int i;
17141639

@@ -1728,20 +1653,10 @@ dnode_block_freed(dnode_t *dn, uint64_t blkid)
17281653
if (blkid == DMU_SPILL_BLKID)
17291654
return (dnode_spill_freed(dn));
17301655

1731-
range_tofind.fr_blkid = blkid;
17321656
mutex_enter(&dn->dn_mtx);
17331657
for (i = 0; i < TXG_SIZE; i++) {
1734-
free_range_t *range_found;
1735-
avl_index_t idx;
1736-
1737-
range_found = avl_find(&dn->dn_ranges[i], &range_tofind, &idx);
1738-
if (range_found) {
1739-
ASSERT(range_found->fr_nblks > 0);
1740-
break;
1741-
}
1742-
range_found = avl_nearest(&dn->dn_ranges[i], idx, AVL_BEFORE);
1743-
if (range_found &&
1744-
range_found->fr_blkid + range_found->fr_nblks > blkid)
1658+
if (dn->dn_free_ranges[i] != NULL &&
1659+
range_tree_contains(dn->dn_free_ranges[i], blkid, 1))
17451660
break;
17461661
}
17471662
mutex_exit(&dn->dn_mtx);

0 commit comments

Comments
 (0)