From 6472a6d0c79826256bfa3a6b45710dd499a984bf Mon Sep 17 00:00:00 2001 From: Schspa Shi Date: Mon, 29 Aug 2022 01:03:02 +0800 Subject: [PATCH] sched/rt: Fix bad task migration for rt tasks [ Upstream commit feffe5bb274dd3442080ef0e4053746091878799 ] Commit 95158a89dd50 ("sched,rt: Use the full cpumask for balancing") allows find_lock_lowest_rq() to pick a task with migration disabled. The purpose of the commit is to push the current running task on the CPU that has the migrate_disable() task away. However, there is a race which allows a migrate_disable() task to be migrated. Consider: CPU0 CPU1 push_rt_task check is_migration_disabled(next_task) task not running and migration_disabled == 0 find_lock_lowest_rq(next_task, rq); _double_lock_balance(this_rq, busiest); raw_spin_rq_unlock(this_rq); double_rq_lock(this_rq, busiest); <> task become running migrate_disable(); deactivate_task(rq, next_task, 0); set_task_cpu(next_task, lowest_rq->cpu); WARN_ON_ONCE(is_migration_disabled(p)); Fixes: 95158a89dd50 ("sched,rt: Use the full cpumask for balancing") Signed-off-by: Schspa Shi Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (Google) Reviewed-by: Dietmar Eggemann Reviewed-by: Valentin Schneider Tested-by: Dwaine Gonyier Signed-off-by: Sasha Levin --- kernel/sched/deadline.c | 1 + kernel/sched/rt.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 9ae8f41e3372f..f7d381b6c3133 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2246,6 +2246,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) || task_on_cpu(rq, task) || !dl_task(task) || + is_migration_disabled(task) || !task_on_rq_queued(task))) { double_unlock_balance(rq, later_rq); later_rq = NULL; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 0a11f44adee57..4f5796dd26a56 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2000,11 +2000,15 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) * the mean time, task could have * migrated already or had its affinity changed. * Also make sure that it wasn't scheduled on its rq. + * It is possible the task was scheduled, set + * "migrate_disabled" and then got preempted, so we must + * check the task migration disable flag here too. */ if (unlikely(task_rq(task) != rq || !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) || task_on_cpu(rq, task) || !rt_task(task) || + is_migration_disabled(task) || !task_on_rq_queued(task))) { double_unlock_balance(rq, lowest_rq);