|
20 | 20 | */
|
21 | 21 | /*
|
22 | 22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
23 |
| - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. |
| 23 | + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. |
24 | 24 | * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
25 | 25 | */
|
26 | 26 |
|
@@ -1517,6 +1517,16 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
|
1517 | 1517 | rw_downgrade(&dn->dn_struct_rwlock);
|
1518 | 1518 | }
|
1519 | 1519 |
|
| 1520 | +static void |
| 1521 | +dnode_dirty_l1(dnode_t *dn, uint64_t l1blkid, dmu_tx_t *tx) |
| 1522 | +{ |
| 1523 | + dmu_buf_impl_t *db = dbuf_hold_level(dn, 1, l1blkid, FTAG); |
| 1524 | + if (db != NULL) { |
| 1525 | + dmu_buf_will_dirty(&db->db, tx); |
| 1526 | + dbuf_rele(db, FTAG); |
| 1527 | + } |
| 1528 | +} |
| 1529 | + |
1520 | 1530 | void
|
1521 | 1531 | dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
|
1522 | 1532 | {
|
@@ -1637,27 +1647,68 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
|
1637 | 1647 | nblks += 1;
|
1638 | 1648 |
|
1639 | 1649 | /*
|
1640 |
| - * Dirty the first and last indirect blocks, as they (and/or their |
1641 |
| - * parents) will need to be written out if they were only |
1642 |
| - * partially freed. Interior indirect blocks will be themselves freed, |
1643 |
| - * by free_children(), so they need not be dirtied. Note that these |
1644 |
| - * interior blocks have already been prefetched by dmu_tx_hold_free(). |
| 1650 | + * Dirty all the indirect blocks in this range. Note that only |
| 1651 | + * the first and last indirect blocks can actually be written |
| 1652 | + * (if they were partially freed) -- they must be dirtied, even if |
| 1653 | + * they do not exist on disk yet. The interior blocks will |
| 1654 | + * be freed by free_children(), so they will not actually be written. |
| 1655 | + * Even though these interior blocks will not be written, we |
| 1656 | + * dirty them for two reasons: |
| 1657 | + * |
| 1658 | + * - It ensures that the indirect blocks remain in memory until |
| 1659 | + * syncing context. (They have already been prefetched by |
| 1660 | + * dmu_tx_hold_free(), so we don't have to worry about reading |
| 1661 | + * them serially here.) |
| 1662 | + * |
| 1663 | + * - The dirty space accounting will put pressure on the txg sync |
| 1664 | + * mechanism to begin syncing, and to delay transactions if there |
| 1665 | + * is a large amount of freeing. Even though these indirect |
| 1666 | + * blocks will not be written, we could need to write the same |
| 1667 | + * amount of space if we copy the freed BPs into deadlists. |
1645 | 1668 | */
|
1646 | 1669 | if (dn->dn_nlevels > 1) {
|
1647 |
| - uint64_t first, last; |
| 1670 | + uint64_t first, last, i, ibyte; |
| 1671 | + int shift, err; |
1648 | 1672 |
|
1649 | 1673 | first = blkid >> epbs;
|
1650 |
| - if ((db = dbuf_hold_level(dn, 1, first, FTAG))) { |
1651 |
| - dmu_buf_will_dirty(&db->db, tx); |
1652 |
| - dbuf_rele(db, FTAG); |
1653 |
| - } |
| 1674 | + dnode_dirty_l1(dn, first, tx); |
1654 | 1675 | if (trunc)
|
1655 | 1676 | last = dn->dn_maxblkid >> epbs;
|
1656 | 1677 | else
|
1657 | 1678 | last = (blkid + nblks - 1) >> epbs;
|
1658 |
| - if (last > first && (db = dbuf_hold_level(dn, 1, last, FTAG))) { |
1659 |
| - dmu_buf_will_dirty(&db->db, tx); |
1660 |
| - dbuf_rele(db, FTAG); |
| 1679 | + if (last != first) |
| 1680 | + dnode_dirty_l1(dn, last, tx); |
| 1681 | + |
| 1682 | + shift = dn->dn_datablkshift + dn->dn_indblkshift - |
| 1683 | + SPA_BLKPTRSHIFT; |
| 1684 | + for (i = first + 1; i < last; i++) { |
| 1685 | + /* |
| 1686 | + * Set i to the blockid of the next non-hole |
| 1687 | + * level-1 indirect block at or after i. Note |
| 1688 | + * that dnode_next_offset() operates in terms of |
| 1689 | + * level-0-equivalent bytes. |
| 1690 | + */ |
| 1691 | + ibyte = i << shift; |
| 1692 | + err = dnode_next_offset(dn, DNODE_FIND_HAVELOCK, |
| 1693 | + &ibyte, 2, 1, 0); |
| 1694 | + i = ibyte >> shift; |
| 1695 | + if (i >= last) |
| 1696 | + break; |
| 1697 | + |
| 1698 | + /* |
| 1699 | + * Normally we should not see an error, either |
| 1700 | + * from dnode_next_offset() or dbuf_hold_level() |
| 1701 | + * (except for ESRCH from dnode_next_offset). |
| 1702 | + * If there is an i/o error, then when we read |
| 1703 | + * this block in syncing context, it will use |
| 1704 | + * ZIO_FLAG_MUSTSUCCEED, and thus hang/panic according |
| 1705 | + * to the "failmode" property. dnode_next_offset() |
| 1706 | + * doesn't have a flag to indicate MUSTSUCCEED. |
| 1707 | + */ |
| 1708 | + if (err != 0) |
| 1709 | + break; |
| 1710 | + |
| 1711 | + dnode_dirty_l1(dn, i, tx); |
1661 | 1712 | }
|
1662 | 1713 | }
|
1663 | 1714 |
|
|
0 commit comments