Skip to content

Commit 090ff09

Browse files
Ricardo M. Correiabehlendorf
authored andcommitted
Fix commit callbacks
The upstream commit cb code had a few bugs: 1) The arguments of the list_move_tail() call in txg_dispatch_callbacks() were reversed by mistake. This caused the commit callbacks to not be called at all. 2) ztest had a bug in ztest_dmu_commit_callbacks() where "error" was not initialized correctly. This seems to have caused the test to always take the simulated error code path, which made ztest unable to detect whether commit cbs were being called for transactions that successfuly complete. 3) ztest had another bug in ztest_dmu_commit_callbacks() where the commit cb threshold was not being compared correctly. 4) The commit cb taskq was using 'max_ncpus * 2' as the maxalloc argument of taskq_create(), which could have caused unnecessary delays in the txg sync thread. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
1 parent a609808 commit 090ff09

File tree

2 files changed

+35
-18
lines changed

2 files changed

+35
-18
lines changed

cmd/ztest/ztest.c

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,22 @@ static boolean_t ztest_exiting;
341341

342342
/* Global commit callback list */
343343
static ztest_cb_list_t zcl;
344+
/* Commit cb delay */
345+
static uint64_t zc_min_txg_delay = UINT64_MAX;
346+
static int zc_cb_counter = 0;
347+
348+
/*
349+
* Minimum number of commit callbacks that need to be registered for us to check
350+
* whether the minimum txg delay is acceptable.
351+
*/
352+
#define ZTEST_COMMIT_CB_MIN_REG 100
353+
354+
/*
355+
* If a number of txgs equal to this threshold have been created after a commit
356+
* callback has been registered but not called, then we assume there is an
357+
* implementation bug.
358+
*/
359+
#define ZTEST_COMMIT_CB_THRESH (TXG_CONCURRENT_STATES + 1000)
344360

345361
extern uint64_t metaslab_gang_bang;
346362
extern uint64_t metaslab_df_alloc_threshold;
@@ -4092,18 +4108,20 @@ ztest_commit_callback(void *arg, int error)
40924108
return;
40934109
}
40944110

4095-
/* Was this callback added to the global callback list? */
4096-
if (!data->zcd_added)
4097-
goto out;
4098-
4111+
ASSERT(data->zcd_added);
40994112
ASSERT3U(data->zcd_txg, !=, 0);
41004113

4101-
/* Remove our callback from the list */
41024114
(void) mutex_lock(&zcl.zcl_callbacks_lock);
4115+
4116+
/* See if this cb was called more quickly */
4117+
if ((synced_txg - data->zcd_txg) < zc_min_txg_delay)
4118+
zc_min_txg_delay = synced_txg - data->zcd_txg;
4119+
4120+
/* Remove our callback from the list */
41034121
list_remove(&zcl.zcl_callbacks, data);
4122+
41044123
(void) mutex_unlock(&zcl.zcl_callbacks_lock);
41054124

4106-
out:
41074125
umem_free(data, sizeof (ztest_cb_data_t));
41084126
}
41094127

@@ -4121,13 +4139,6 @@ ztest_create_cb_data(objset_t *os, uint64_t txg)
41214139
return (cb_data);
41224140
}
41234141

4124-
/*
4125-
* If a number of txgs equal to this threshold have been created after a commit
4126-
* callback has been registered but not called, then we assume there is an
4127-
* implementation bug.
4128-
*/
4129-
#define ZTEST_COMMIT_CALLBACK_THRESH (TXG_CONCURRENT_STATES + 2)
4130-
41314142
/*
41324143
* Commit callback test.
41334144
*/
@@ -4139,7 +4150,7 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id)
41394150
dmu_tx_t *tx;
41404151
ztest_cb_data_t *cb_data[3], *tmp_cb;
41414152
uint64_t old_txg, txg;
4142-
int i, error;
4153+
int i, error = 0;
41434154

41444155
ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0);
41454156

@@ -4219,7 +4230,7 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id)
42194230
*/
42204231
tmp_cb = list_head(&zcl.zcl_callbacks);
42214232
if (tmp_cb != NULL &&
4222-
tmp_cb->zcd_txg > txg - ZTEST_COMMIT_CALLBACK_THRESH) {
4233+
tmp_cb->zcd_txg + ZTEST_COMMIT_CB_THRESH < txg) {
42234234
fatal(0, "Commit callback threshold exceeded, oldest txg: %"
42244235
PRIu64 ", open txg: %" PRIu64 "\n", tmp_cb->zcd_txg, txg);
42254236
}
@@ -4250,6 +4261,8 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id)
42504261
tmp_cb = cb_data[i];
42514262
}
42524263

4264+
zc_cb_counter += 3;
4265+
42534266
(void) mutex_unlock(&zcl.zcl_callbacks_lock);
42544267

42554268
dmu_tx_commit(tx);
@@ -5256,6 +5269,10 @@ ztest_run(ztest_shared_t *zs)
52565269
for (uint64_t object = 1; object < 50; object++)
52575270
dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20);
52585271

5272+
/* Verify that at least one commit cb was called in a timely fashion */
5273+
if (zc_cb_counter >= ZTEST_COMMIT_CB_MIN_REG)
5274+
VERIFY3U(zc_min_txg_delay, ==, 0);
5275+
52595276
spa_close(spa, FTAG);
52605277

52615278
/*

module/zfs/txg.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -335,15 +335,15 @@ txg_dispatch_callbacks(dsl_pool_t *dp, uint64_t txg)
335335
* Commit callback taskq hasn't been created yet.
336336
*/
337337
tx->tx_commit_cb_taskq = taskq_create("tx_commit_cb",
338-
max_ncpus, minclsyspri, max_ncpus, max_ncpus * 2,
339-
TASKQ_PREPOPULATE);
338+
100, minclsyspri, max_ncpus, INT_MAX,
339+
TASKQ_THREADS_CPU_PCT | TASKQ_PREPOPULATE);
340340
}
341341

342342
cb_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
343343
list_create(cb_list, sizeof (dmu_tx_callback_t),
344344
offsetof(dmu_tx_callback_t, dcb_node));
345345

346-
list_move_tail(&tc->tc_callbacks[g], cb_list);
346+
list_move_tail(cb_list, &tc->tc_callbacks[g]);
347347

348348
(void) taskq_dispatch(tx->tx_commit_cb_taskq, (task_func_t *)
349349
txg_do_callbacks, cb_list, TQ_SLEEP);

0 commit comments

Comments
 (0)