Permalink
Browse files

Fix random ztest_deadman_thread failures

The zloop test has been failing in buildbot for the last few weeks
with various failures in ztest_deadman_thread(). This is due to the
fact that this thread is not stopped when performing pool import /
export tests as it should be. This patch simply corrects this.

TEST_ZTEST_TIMEOUT=3600

Signed-off-by: Tom Caputi <tcaputi@datto.com>
  • Loading branch information...
tcaputi committed Oct 10, 2018
1 parent 5b3bfd8 commit acb449403de126c6badfe5200c64a81f747bed48
Showing with 19 additions and 7 deletions.
  1. +19 −7 cmd/ztest/ztest.c
View
@@ -6652,13 +6652,20 @@ ztest_deadman_thread(void *arg)
{
ztest_shared_t *zs = arg;
spa_t *spa = ztest_spa;
hrtime_t delta, overdue, total = 0;
hrtime_t delay, overdue, last_run = gethrtime(), total = 0;
for (;;) {
delta = zs->zs_thread_stop - zs->zs_thread_start +
MSEC2NSEC(zfs_deadman_synctime_ms);
delay = zs->zs_thread_stop - zs->zs_thread_start +
MSEC2NSEC(zfs_deadman_synctime_ms);
(void) poll(NULL, 0, (int)NSEC2MSEC(delta));
while (!ztest_exiting) {
/*
* Wait for the delay timer while checking occasionally
* if we should stop.
*/
if (gethrtime() - last_run < delay) {
(void) poll(NULL, 0, 1000);
continue;
}
/*
* If the pool is suspended then fail immediately. Otherwise,
@@ -6687,7 +6694,11 @@ ztest_deadman_thread(void *arg)
(void) printf("ztest has been running for %lld seconds\n",
total);
last_run = gethrtime();
}
thread_exit();
}
static void
@@ -6881,7 +6892,7 @@ ztest_run(ztest_shared_t *zs)
{
spa_t *spa;
objset_t *os;
kthread_t *resume_thread;
kthread_t *resume_thread, *deadman_thread;
kthread_t **run_threads;
uint64_t object;
int error;
@@ -6939,7 +6950,7 @@ ztest_run(ztest_shared_t *zs)
/*
* Create a deadman thread and set to panic if we hang.
*/
(void) thread_create(NULL, 0, ztest_deadman_thread,
deadman_thread = thread_create(NULL, 0, ztest_deadman_thread,
zs, 0, NULL, TS_RUN | TS_JOINABLE, defclsyspri);
spa->spa_deadman_failmode = ZIO_FAILURE_MODE_PANIC;
@@ -7009,6 +7020,7 @@ ztest_run(ztest_shared_t *zs)
/* Kill the resume thread */
ztest_exiting = B_TRUE;
VERIFY0(thread_join(resume_thread));
VERIFY0(thread_join(deadman_thread));
ztest_resume(spa);
/*

0 comments on commit acb4494

Please sign in to comment.