Skip to content

Commit 29809a6

Browse files
ahrensbehlendorf
authored andcommitted
Illumos #3086: unnecessarily setting DS_FLAG_INCONSISTENT on async
3086 unnecessarily setting DS_FLAG_INCONSISTENT on async destroyed datasets Reviewed by: Christopher Siden <chris.siden@delphix.com> Approved by: Eric Schrock <Eric.Schrock@delphix.com> References: illumos/illumos-gate@ce636f8 illumos changeset: 13776:cd512c80fd75 https://www.illumos.org/issues/3086 Ported-by: Brian Behlendorf <behlendorf1@llnl.gov>
1 parent b9b24bb commit 29809a6

File tree

12 files changed

+209
-115
lines changed

12 files changed

+209
-115
lines changed

cmd/ztest/ztest.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2277,6 +2277,7 @@ ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
22772277
{
22782278
objset_t *os = zd->zd_os;
22792279

2280+
mutex_enter(&zd->zd_dirobj_lock);
22802281
(void) rw_enter(&zd->zd_zilog_lock, RW_WRITER);
22812282

22822283
/* zfs_sb_teardown() */
@@ -2287,6 +2288,7 @@ ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
22872288
zil_replay(os, zd, ztest_replay_vector);
22882289

22892290
(void) rw_exit(&zd->zd_zilog_lock);
2291+
mutex_exit(&zd->zd_dirobj_lock);
22902292
}
22912293

22922294
/*
@@ -5743,6 +5745,7 @@ ztest_freeze(void)
57435745
*/
57445746
kernel_init(FREAD | FWRITE);
57455747
VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
5748+
ASSERT(spa_freeze_txg(spa) == UINT64_MAX);
57465749
VERIFY3U(0, ==, ztest_dataset_open(0));
57475750
ztest_dataset_close(0);
57485751

include/sys/dsl_pool.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ typedef struct dsl_pool {
9090

9191
/* No lock needed - sync context only */
9292
blkptr_t dp_meta_rootbp;
93-
list_t dp_synced_datasets;
9493
hrtime_t dp_read_overhead;
9594
uint64_t dp_throughput; /* bytes per millisec */
9695
uint64_t dp_write_limit;
@@ -104,13 +103,17 @@ typedef struct dsl_pool {
104103
kmutex_t dp_lock;
105104
uint64_t dp_space_towrite[TXG_SIZE];
106105
uint64_t dp_tempreserved[TXG_SIZE];
106+
uint64_t dp_mos_used_delta;
107+
uint64_t dp_mos_compressed_delta;
108+
uint64_t dp_mos_uncompressed_delta;
107109
uint64_t dp_txg_history_size;
108110
list_t dp_txg_history;
109111

110112

111113
/* Has its own locking */
112114
tx_state_t dp_tx;
113115
txg_list_t dp_dirty_datasets;
116+
txg_list_t dp_dirty_zilogs;
114117
txg_list_t dp_dirty_dirs;
115118
txg_list_t dp_sync_tasks;
116119

@@ -150,6 +153,8 @@ int dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp,
150153
void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx);
151154
void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx);
152155
void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx);
156+
void dsl_pool_mos_diduse_space(dsl_pool_t *dp,
157+
int64_t used, int64_t comp, int64_t uncomp);
153158

154159
taskq_t *dsl_pool_iput_taskq(dsl_pool_t *dp);
155160

include/sys/txg.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
2323
* Use is subject to license terms.
2424
*/
25+
/*
26+
* Copyright (c) 2012 by Delphix. All rights reserved.
27+
*/
2528

2629
#ifndef _SYS_TXG_H
2730
#define _SYS_TXG_H
@@ -121,7 +124,7 @@ extern void txg_wait_callbacks(struct dsl_pool *dp);
121124

122125
extern void txg_list_create(txg_list_t *tl, size_t offset);
123126
extern void txg_list_destroy(txg_list_t *tl);
124-
extern int txg_list_empty(txg_list_t *tl, uint64_t txg);
127+
extern boolean_t txg_list_empty(txg_list_t *tl, uint64_t txg);
125128
extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
126129
extern int txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg);
127130
extern void *txg_list_remove(txg_list_t *tl, uint64_t txg);

include/sys/zil.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
*/
2121
/*
2222
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23+
* Copyright (c) 2012 by Delphix. All rights reserved.
2324
*/
2425

2526
/* Portions Copyright 2010 Robert Milkowski */
@@ -454,6 +455,7 @@ extern void zil_replay(objset_t *os, void *arg,
454455
zil_replay_func_t *replay_func[TX_MAX_TYPE]);
455456
extern boolean_t zil_replaying(zilog_t *zilog, dmu_tx_t *tx);
456457
extern void zil_destroy(zilog_t *zilog, boolean_t keep_first);
458+
extern void zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx);
457459
extern void zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx);
458460

459461
extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize);

include/sys/zil_impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
*/
2121
/*
2222
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23+
* Copyright (c) 2012 by Delphix. All rights reserved.
2324
*/
2425

2526
/* Portions Copyright 2010 Robert Milkowski */
@@ -131,6 +132,7 @@ struct zilog {
131132
zil_header_t zl_old_header; /* debugging aid */
132133
uint_t zl_prev_blks[ZIL_PREV_BLKS]; /* size - sector rounded */
133134
uint_t zl_prev_rotor; /* rotor for zl_prev[] */
135+
txg_node_t zl_dirty_link; /* protected by dp_dirty_zilogs list */
134136
};
135137

136138
typedef struct zil_bp_node {

module/zfs/dmu.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1953,15 +1953,15 @@ dmu_init(void)
19531953
dbuf_init();
19541954
zfetch_init();
19551955
dmu_tx_init();
1956-
arc_init();
19571956
l2arc_init();
1957+
arc_init();
19581958
}
19591959

19601960
void
19611961
dmu_fini(void)
19621962
{
1963-
l2arc_fini();
19641963
arc_fini();
1964+
l2arc_fini();
19651965
dmu_tx_fini();
19661966
zfetch_fini();
19671967
dbuf_fini();

module/zfs/dmu_send.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1609,13 +1609,6 @@ dmu_recv_existing_end(dmu_recv_cookie_t *drc)
16091609
dsl_dataset_t *ds = drc->drc_logical_ds;
16101610
int err, myerr;
16111611

1612-
/*
1613-
* XXX hack; seems the ds is still dirty and dsl_pool_zil_clean()
1614-
* expects it to have a ds_user_ptr (and zil), but clone_swap()
1615-
* can close it.
1616-
*/
1617-
txg_wait_synced(ds->ds_dir->dd_pool, 0);
1618-
16191612
if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) {
16201613
err = dsl_dataset_clone_swap(drc->drc_real_ds, ds,
16211614
drc->drc_force);

module/zfs/dsl_dataset.c

Lines changed: 47 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -105,14 +105,8 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
105105
ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
106106
ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
107107
if (ds == NULL) {
108-
/*
109-
* Account for the meta-objset space in its placeholder
110-
* dsl_dir.
111-
*/
112-
ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
113-
dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
114-
used, compressed, uncompressed, tx);
115-
dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
108+
dsl_pool_mos_diduse_space(tx->tx_pool,
109+
used, compressed, uncompressed);
116110
return;
117111
}
118112
dmu_buf_will_dirty(ds->ds_dbuf, tx);
@@ -150,15 +144,9 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
150144

151145
ASSERT(used > 0);
152146
if (ds == NULL) {
153-
/*
154-
* Account for the meta-objset space in its placeholder
155-
* dataset.
156-
*/
157147
dsl_free(tx->tx_pool, tx->tx_txg, bp);
158-
159-
dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
160-
-used, -compressed, -uncompressed, tx);
161-
dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
148+
dsl_pool_mos_diduse_space(tx->tx_pool,
149+
-used, -compressed, -uncompressed);
162150
return (used);
163151
}
164152
ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
@@ -1074,26 +1062,26 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
10741062
dummy_ds->ds_dir = dd;
10751063
dummy_ds->ds_object = ds->ds_object;
10761064

1077-
/*
1078-
* Check for errors and mark this ds as inconsistent, in
1079-
* case we crash while freeing the objects.
1080-
*/
1081-
err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
1082-
dsl_dataset_destroy_begin_sync, ds, NULL, 0);
1083-
if (err)
1084-
goto out_free;
1085-
1086-
err = dmu_objset_from_ds(ds, &os);
1087-
if (err)
1088-
goto out_free;
1089-
1090-
/*
1091-
* If async destruction is not enabled try to remove all objects
1092-
* while in the open context so that there is less work to do in
1093-
* the syncing context.
1094-
*/
10951065
if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
10961066
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
1067+
/*
1068+
* Check for errors and mark this ds as inconsistent, in
1069+
* case we crash while freeing the objects.
1070+
*/
1071+
err = dsl_sync_task_do(dd->dd_pool,
1072+
dsl_dataset_destroy_begin_check,
1073+
dsl_dataset_destroy_begin_sync, ds, NULL, 0);
1074+
if (err)
1075+
goto out_free;
1076+
1077+
err = dmu_objset_from_ds(ds, &os);
1078+
if (err)
1079+
goto out_free;
1080+
1081+
/*
1082+
* Remove all objects while in the open context so that
1083+
* there is less work to do in the syncing context.
1084+
*/
10971085
for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
10981086
ds->ds_phys->ds_prev_snap_txg)) {
10991087
/*
@@ -1104,29 +1092,25 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
11041092
}
11051093
if (err != ESRCH)
11061094
goto out_free;
1107-
}
11081095

1109-
/*
1110-
* Only the ZIL knows how to free log blocks.
1111-
*/
1112-
zil_destroy(dmu_objset_zil(os), B_FALSE);
1113-
1114-
/*
1115-
* Sync out all in-flight IO.
1116-
*/
1117-
txg_wait_synced(dd->dd_pool, 0);
1096+
/*
1097+
* Sync out all in-flight IO.
1098+
*/
1099+
txg_wait_synced(dd->dd_pool, 0);
11181100

1119-
/*
1120-
* If we managed to free all the objects in open
1121-
* context, the user space accounting should be zero.
1122-
*/
1123-
if (ds->ds_phys->ds_bp.blk_fill == 0 &&
1124-
dmu_objset_userused_enabled(os)) {
1125-
ASSERTV(uint64_t count);
1126-
ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 ||
1127-
count == 0);
1128-
ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 ||
1129-
count == 0);
1101+
/*
1102+
* If we managed to free all the objects in open
1103+
* context, the user space accounting should be zero.
1104+
*/
1105+
if (ds->ds_phys->ds_bp.blk_fill == 0 &&
1106+
dmu_objset_userused_enabled(os)) {
1107+
ASSERTV(uint64_t count);
1108+
1109+
ASSERT(zap_count(os, DMU_USERUSED_OBJECT,
1110+
&count) != 0 || count == 0);
1111+
ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT,
1112+
&count) != 0 || count == 0);
1113+
}
11301114
}
11311115

11321116
rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
@@ -1878,6 +1862,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
18781862
} else {
18791863
zfeature_info_t *async_destroy =
18801864
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
1865+
objset_t *os;
18811866

18821867
/*
18831868
* There's no next snapshot, so this is a head dataset.
@@ -1889,6 +1874,8 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
18891874
dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
18901875
ds->ds_phys->ds_deadlist_obj = 0;
18911876

1877+
VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os));
1878+
18921879
if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
18931880
err = old_synchronous_dataset_destroy(ds, tx);
18941881
} else {
@@ -1898,12 +1885,12 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
18981885
*/
18991886
uint64_t used, comp, uncomp;
19001887

1901-
ASSERT(err == 0 || err == EBUSY);
1888+
zil_destroy_sync(dmu_objset_zil(os), tx);
1889+
19021890
if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
19031891
spa_feature_incr(dp->dp_spa, async_destroy, tx);
1904-
dp->dp_bptree_obj = bptree_alloc(
1905-
dp->dp_meta_objset, tx);
1906-
VERIFY(zap_add(dp->dp_meta_objset,
1892+
dp->dp_bptree_obj = bptree_alloc(mos, tx);
1893+
VERIFY(zap_add(mos,
19071894
DMU_POOL_DIRECTORY_OBJECT,
19081895
DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
19091896
&dp->dp_bptree_obj, tx) == 0);
@@ -1916,7 +1903,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
19161903
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
19171904
ds->ds_phys->ds_unique_bytes == used);
19181905

1919-
bptree_add(dp->dp_meta_objset, dp->dp_bptree_obj,
1906+
bptree_add(mos, dp->dp_bptree_obj,
19201907
&ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
19211908
used, comp, uncomp, tx);
19221909
dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
@@ -2203,7 +2190,6 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
22032190
dmu_buf_will_dirty(ds->ds_dbuf, tx);
22042191
ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
22052192

2206-
dsl_dir_dirty(ds->ds_dir, tx);
22072193
dmu_objset_sync(ds->ds_objset, zio, tx);
22082194
}
22092195

module/zfs/dsl_dir.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,6 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
189189
kmem_free(dd, sizeof (dsl_dir_t));
190190
dmu_buf_rele(dbuf, tag);
191191
return (err);
192-
193192
}
194193

195194
void
@@ -223,7 +222,7 @@ dsl_dir_name(dsl_dir_t *dd, char *buf)
223222
}
224223
}
225224

226-
/* Calculate name legnth, avoiding all the strcat calls of dsl_dir_name */
225+
/* Calculate name length, avoiding all the strcat calls of dsl_dir_name */
227226
int
228227
dsl_dir_namelen(dsl_dir_t *dd)
229228
{
@@ -592,8 +591,6 @@ dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
592591
{
593592
ASSERT(dmu_tx_is_syncing(tx));
594593

595-
dmu_buf_will_dirty(dd->dd_dbuf, tx);
596-
597594
mutex_enter(&dd->dd_lock);
598595
ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0);
599596
dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
@@ -950,8 +947,6 @@ dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
950947
ASSERT(dmu_tx_is_syncing(tx));
951948
ASSERT(type < DD_USED_NUM);
952949

953-
dsl_dir_dirty(dd, tx);
954-
955950
if (needlock)
956951
mutex_enter(&dd->dd_lock);
957952
accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used);
@@ -960,6 +955,7 @@ dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
960955
dd->dd_phys->dd_compressed_bytes >= -compressed);
961956
ASSERT(uncompressed >= 0 ||
962957
dd->dd_phys->dd_uncompressed_bytes >= -uncompressed);
958+
dmu_buf_will_dirty(dd->dd_dbuf, tx);
963959
dd->dd_phys->dd_used_bytes += used;
964960
dd->dd_phys->dd_uncompressed_bytes += uncompressed;
965961
dd->dd_phys->dd_compressed_bytes += compressed;
@@ -1003,13 +999,13 @@ dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
1003999
if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN))
10041000
return;
10051001

1006-
dsl_dir_dirty(dd, tx);
10071002
if (needlock)
10081003
mutex_enter(&dd->dd_lock);
10091004
ASSERT(delta > 0 ?
10101005
dd->dd_phys->dd_used_breakdown[oldtype] >= delta :
10111006
dd->dd_phys->dd_used_breakdown[newtype] >= -delta);
10121007
ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta));
1008+
dmu_buf_will_dirty(dd->dd_dbuf, tx);
10131009
dd->dd_phys->dd_used_breakdown[oldtype] -= delta;
10141010
dd->dd_phys->dd_used_breakdown[newtype] += delta;
10151011
if (needlock)

0 commit comments

Comments
 (0)