Skip to content

Commit

Permalink
md: fix stopping sync thread
Browse files Browse the repository at this point in the history
Currently sync thread is stopped from multiple contex:
 - idle_sync_thread
 - frozen_sync_thread
 - __md_stop_writes
 - md_set_readonly
 - do_md_stop

And there are some problems:
1) sync_work is flushed while reconfig_mutex is grabbed, this can
   deadlock because the work function will grab reconfig_mutex as well.
2) md_reap_sync_thread() can't be called directly while md_do_sync() is
   not finished yet, for example, commit 130443d ("md: refactor
   idle/frozen_sync_thread() to fix deadlock").
3) If MD_RECOVERY_RUNNING is not set, there is no need to stop
   sync_thread at all because sync_thread must not be registered.

Factor out a helper stop_sync_thread(), so that above contex will behave
the same. Fix 1) by flushing sync_work after reconfig_mutex is released,
before waiting for sync_thread to be done; Fix 2) bt letting daemon thread
to unregister sync_thread; Fix 3) by always checking MD_RECOVERY_RUNNING
first.

Fixes: db5e653 ("md: delay choosing sync action to md_start_sync()")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20231205094215.1824240-4-yukuai1@huaweicloud.com
  • Loading branch information
Yu Kuai authored and liu-song-6 committed Dec 6, 2023
1 parent c9f7cb5 commit f52f5c7
Showing 1 changed file with 37 additions and 53 deletions.
90 changes: 37 additions & 53 deletions drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -4840,59 +4840,72 @@ action_show(struct mddev *mddev, char *page)
return sprintf(page, "%s\n", type);
}

static void stop_sync_thread(struct mddev *mddev)
/**
* stop_sync_thread() - wait for sync_thread to stop if it's running.
* @mddev: the array.
* @locked: if set, reconfig_mutex will still be held after this function
* return; if not set, reconfig_mutex will be released after this
* function return.
* @check_seq: if set, only wait for curent running sync_thread to stop, noted
* that new sync_thread can still start.
*/
static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
{
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return;
int sync_seq;

if (mddev_lock(mddev))
return;
if (check_seq)
sync_seq = atomic_read(&mddev->sync_seq);

/*
* Check again in case MD_RECOVERY_RUNNING is cleared before lock is
* held.
*/
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
mddev_unlock(mddev);
if (!locked)
mddev_unlock(mddev);
return;
}

if (work_pending(&mddev->sync_work))
flush_workqueue(md_misc_wq);
mddev_unlock(mddev);

set_bit(MD_RECOVERY_INTR, &mddev->recovery);
/*
* Thread might be blocked waiting for metadata update which will now
* never happen
*/
md_wakeup_thread_directly(mddev->sync_thread);
if (work_pending(&mddev->sync_work))
flush_work(&mddev->sync_work);

mddev_unlock(mddev);
wait_event(resync_wait,
!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
(check_seq && sync_seq != atomic_read(&mddev->sync_seq)));

if (locked)
mddev_lock_nointr(mddev);
}

static void idle_sync_thread(struct mddev *mddev)
{
int sync_seq = atomic_read(&mddev->sync_seq);

mutex_lock(&mddev->sync_mutex);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
stop_sync_thread(mddev);

wait_event(resync_wait, sync_seq != atomic_read(&mddev->sync_seq) ||
!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
if (mddev_lock(mddev)) {
mutex_unlock(&mddev->sync_mutex);
return;
}

stop_sync_thread(mddev, false, true);
mutex_unlock(&mddev->sync_mutex);
}

static void frozen_sync_thread(struct mddev *mddev)
{
mutex_lock(&mddev->sync_mutex);
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
stop_sync_thread(mddev);

wait_event(resync_wait, mddev->sync_thread == NULL &&
!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
if (mddev_lock(mddev)) {
mutex_unlock(&mddev->sync_mutex);
return;
}

stop_sync_thread(mddev, false, false);
mutex_unlock(&mddev->sync_mutex);
}

Expand Down Expand Up @@ -6264,14 +6277,7 @@ static void md_clean(struct mddev *mddev)

static void __md_stop_writes(struct mddev *mddev)
{
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (work_pending(&mddev->sync_work))
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev);
}

stop_sync_thread(mddev, true, false);
del_timer_sync(&mddev->safemode_timer);

if (mddev->pers && mddev->pers->quiesce) {
Expand Down Expand Up @@ -6363,18 +6369,8 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
set_bit(MD_RECOVERY_INTR, &mddev->recovery);

/*
* Thread might be blocked waiting for metadata update which will now
* never happen
*/
md_wakeup_thread_directly(mddev->sync_thread);

mddev_unlock(mddev);
wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
&mddev->recovery));
stop_sync_thread(mddev, false, false);
wait_event(mddev->sb_wait,
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
mddev_lock_nointr(mddev);
Expand Down Expand Up @@ -6428,20 +6424,8 @@ static int do_md_stop(struct mddev *mddev, int mode,
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
set_bit(MD_RECOVERY_INTR, &mddev->recovery);

/*
* Thread might be blocked waiting for metadata update which will now
* never happen
*/
md_wakeup_thread_directly(mddev->sync_thread);

mddev_unlock(mddev);
wait_event(resync_wait, (mddev->sync_thread == NULL &&
!test_bit(MD_RECOVERY_RUNNING,
&mddev->recovery)));
mddev_lock_nointr(mddev);
stop_sync_thread(mddev, true, false);

mutex_lock(&mddev->open_mutex);
if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
Expand Down

0 comments on commit f52f5c7

Please sign in to comment.