Skip to content

Commit

Permalink
ptrace: fix ptrace vs tasklist_lock race
Browse files Browse the repository at this point in the history
As explained by Alexander Fyodorov <halcy@yandex.ru>:

|read_lock(&tasklist_lock) in ptrace_stop() is converted to mutex on RT kernel,
|and it can remove __TASK_TRACED from task->state (by moving  it to
|task->saved_state). If parent does wait() on child followed by a sys_ptrace
|call, the following race can happen:
|
|- child sets __TASK_TRACED in ptrace_stop()
|- parent does wait() which eventually calls wait_task_stopped() and returns
|  child's pid
|- child blocks on read_lock(&tasklist_lock) in ptrace_stop() and moves
|  __TASK_TRACED flag to saved_state
|- parent calls sys_ptrace, which calls ptrace_check_attach() and wait_task_inactive()

The patch is based on his initial patch where an additional check is
added in case the __TASK_TRACED moved to ->saved_state. The pi_lock is
taken in case the caller is interrupted between looking into ->state and
->saved_state.

[ Fix for ptrace_unfreeze_traced() by Oleg Nesterov ]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
  • Loading branch information
Sebastian Andrzej Siewior committed Sep 13, 2021
1 parent 6a6f78b commit 489b92d
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 13 deletions.
79 changes: 75 additions & 4 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,8 @@ struct task_group;

#define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING)

#define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0)

#define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0)

#define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0)

/*
* Special states are those that do not use the normal wait-loop pattern. See
* the comment with set_special_state().
Expand Down Expand Up @@ -2014,6 +2010,81 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
}

#ifdef CONFIG_PREEMPT_RT
static inline bool task_match_saved_state(struct task_struct *p, long match_state)
{
return p->saved_state == match_state;
}

static inline bool task_is_traced(struct task_struct *task)
{
bool traced = false;

/* in case the task is sleeping on tasklist_lock */
raw_spin_lock_irq(&task->pi_lock);
if (READ_ONCE(task->__state) & __TASK_TRACED)
traced = true;
else if (task->saved_state & __TASK_TRACED)
traced = true;
raw_spin_unlock_irq(&task->pi_lock);
return traced;
}

static inline bool task_is_stopped_or_traced(struct task_struct *task)
{
bool traced_stopped = false;
unsigned long flags;

raw_spin_lock_irqsave(&task->pi_lock, flags);

if (READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED))
traced_stopped = true;
else if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED))
traced_stopped = true;

raw_spin_unlock_irqrestore(&task->pi_lock, flags);
return traced_stopped;
}

#else

static inline bool task_match_saved_state(struct task_struct *p, long match_state)
{
return false;
}

static inline bool task_is_traced(struct task_struct *task)
{
return READ_ONCE(task->__state) & __TASK_TRACED;
}

static inline bool task_is_stopped_or_traced(struct task_struct *task)
{
return READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED);
}
#endif

static inline bool task_match_state_or_saved(struct task_struct *p,
long match_state)
{
if (READ_ONCE(p->__state) == match_state)
return true;

return task_match_saved_state(p, match_state);
}

static inline bool task_match_state_lock(struct task_struct *p,
long match_state)
{
bool match;

raw_spin_lock_irq(&p->pi_lock);
match = task_match_state_or_saved(p, match_state);
raw_spin_unlock_irq(&p->pi_lock);

return match;
}

/*
* cond_resched() and cond_resched_lock(): latency reduction via
* explicit rescheduling in places that are safe. The return
Expand Down
38 changes: 31 additions & 7 deletions kernel/ptrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,18 @@ static bool ptrace_freeze_traced(struct task_struct *task)
spin_lock_irq(&task->sighand->siglock);
if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
!__fatal_signal_pending(task)) {
#ifdef CONFIG_PREEMPT_RT
unsigned long flags;

raw_spin_lock_irqsave(&task->pi_lock, flags);
if (READ_ONCE(task->__state) & __TASK_TRACED)
WRITE_ONCE(task->__state, __TASK_TRACED);
else
task->saved_state = __TASK_TRACED;
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
#else
WRITE_ONCE(task->__state, __TASK_TRACED);
#endif
ret = true;
}
spin_unlock_irq(&task->sighand->siglock);
Expand All @@ -207,7 +218,11 @@ static bool ptrace_freeze_traced(struct task_struct *task)

static void ptrace_unfreeze_traced(struct task_struct *task)
{
if (READ_ONCE(task->__state) != __TASK_TRACED)
unsigned long flags;
bool frozen = true;

if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
READ_ONCE(task->__state) != __TASK_TRACED)
return;

WARN_ON(!task->ptrace || task->parent != current);
Expand All @@ -217,12 +232,21 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
* Recheck state under the lock to close this race.
*/
spin_lock_irq(&task->sighand->siglock);
if (READ_ONCE(task->__state) == __TASK_TRACED) {
if (__fatal_signal_pending(task))
wake_up_state(task, __TASK_TRACED);
else
WRITE_ONCE(task->__state, TASK_TRACED);
}
raw_spin_lock_irqsave(&task->pi_lock, flags);
if (READ_ONCE(task->__state) == __TASK_TRACED)
WRITE_ONCE(task->__state, TASK_TRACED);

#ifdef CONFIG_PREEMPT_RT
else if (task->saved_state == __TASK_TRACED)
task->saved_state = TASK_TRACED;
#endif
else
frozen = false;
raw_spin_unlock_irqrestore(&task->pi_lock, flags);

if (frozen && __fatal_signal_pending(task))
wake_up_state(task, __TASK_TRACED);

spin_unlock_irq(&task->sighand->siglock);
}

Expand Down
4 changes: 2 additions & 2 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -3207,7 +3207,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
* is actually now running somewhere else!
*/
while (task_running(rq, p)) {
if (match_state && unlikely(READ_ONCE(p->__state) != match_state))
if (match_state && !task_match_state_lock(p, match_state))
return 0;
cpu_relax();
}
Expand All @@ -3222,7 +3222,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
running = task_running(rq, p);
queued = task_on_rq_queued(p);
ncsw = 0;
if (!match_state || READ_ONCE(p->__state) == match_state)
if (!match_state || task_match_state_or_saved(p, match_state))
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
task_rq_unlock(rq, p, &rf);

Expand Down

0 comments on commit 489b92d

Please sign in to comment.