Skip to content

Commit

Permalink
rcu/nocb: Protect NOCB state via local_lock() under PREEMPT_RT
Browse files Browse the repository at this point in the history
Warning
=======

Running v5.13-rt1 on my arm64 Juno board triggers:

[    0.156302] =============================
[    0.160416] WARNING: suspicious RCU usage
[    0.164529] 5.13.0-rt1 #20 Not tainted
[    0.168300] -----------------------------
[    0.172409] kernel/rcu/tree_plugin.h:69 Unsafe read of RCU_NOCB offloaded state!
[    0.179920]
[    0.179920] other info that might help us debug this:
[    0.179920]
[    0.188037]
[    0.188037] rcu_scheduler_active = 1, debug_locks = 1
[    0.194677] 3 locks held by rcuc/0/11:
[    0.198448] #0: ffff00097ef10cf8 ((softirq_ctrl.lock).lock){+.+.}-{2:2}, at: __local_bh_disable_ip (./include/linux/rcupdate.h:662 kernel/softirq.c:171)
[    0.208709] #1: ffff80001205e5f0 (rcu_read_lock){....}-{1:2}, at: rt_spin_lock (kernel/locking/spinlock_rt.c:43 (discriminator 4))
[    0.217134] #2: ffff80001205e5f0 (rcu_read_lock){....}-{1:2}, at: __local_bh_disable_ip (kernel/softirq.c:169)
[    0.226428]
[    0.226428] stack backtrace:
[    0.230889] CPU: 0 PID: 11 Comm: rcuc/0 Not tainted 5.13.0-rt1 #20
[    0.237100] Hardware name: ARM Juno development board (r0) (DT)
[    0.243041] Call trace:
[    0.245497] dump_backtrace (arch/arm64/kernel/stacktrace.c:163)
[    0.249185] show_stack (arch/arm64/kernel/stacktrace.c:219)
[    0.252522] dump_stack (lib/dump_stack.c:122)
[    0.255947] lockdep_rcu_suspicious (kernel/locking/lockdep.c:6439)
[    0.260328] rcu_rdp_is_offloaded (kernel/rcu/tree_plugin.h:69 kernel/rcu/tree_plugin.h:58)
[    0.264537] rcu_core (kernel/rcu/tree.c:2332 kernel/rcu/tree.c:2398 kernel/rcu/tree.c:2777)
[    0.267786] rcu_cpu_kthread (./include/linux/bottom_half.h:32 kernel/rcu/tree.c:2876)
[    0.271644] smpboot_thread_fn (kernel/smpboot.c:165 (discriminator 3))
[    0.275767] kthread (kernel/kthread.c:321)
[    0.279013] ret_from_fork (arch/arm64/kernel/entry.S:1005)

In this case, this is the RCU core kthread accessing the local CPU's
rdp. Before that, rcu_cpu_kthread() invokes local_bh_disable().

Under !CONFIG_PREEMPT_RT (and rcutree.use_softirq=0), this ends up
incrementing the preempt_count, which satisfies the "local non-preemptible
read" of rcu_rdp_is_offloaded().

Under CONFIG_PREEMPT_RT however, this becomes

  local_lock(&softirq_ctrl.lock)

which, under the same config, is migrate_disable() + rt_spin_lock(). As
pointed out by Frederic, this is not sufficient to safely access an rdp's
offload state, as the RCU core kthread can be preempted by a kworker
executing rcu_nocb_rdp_offload() [1].

Introduce a local_lock to serialize an rdp's offload state while the rdp's
associated core kthread is executing rcu_core().

rcu_core() preemptability considerations
========================================

As pointed out by Paul [2], keeping rcu_check_quiescent_state() preemptible
(which is the case under CONFIG_PREEMPT_RT) requires some consideration.

note_gp_changes() itself runs with irqs off, and enters
__note_gp_changes() with rnp->lock held (raw_spinlock), thus is safe vs
preemption.

rdp->core_needs_qs *could* change after being read by the RCU core
kthread if it then gets preempted. Consider, with
CONFIG_RCU_STRICT_GRACE_PERIOD:

  rcuc/x                                   task_foo

    rcu_check_quiescent_state()
    `\
      rdp->core_needs_qs == true
			     <PREEMPT>
					   rcu_read_unlock()
					   `\
					     rcu_preempt_deferred_qs_irqrestore()
					     `\
					       rcu_report_qs_rdp()
					       `\
						 rdp->core_needs_qs := false;

This would let rcuc/x's rcu_check_quiescent_state() proceed further down to
rcu_report_qs_rdp(), but if task_foo's earlier rcu_report_qs_rdp()
invocation would have cleared the rdp grpmask from the rnp mask, so
rcuc/x's invocation would simply bail.

Since rcu_report_qs_rdp() can be safely invoked, even if rdp->core_needs_qs
changed, it appears safe to keep rcu_check_quiescent_state() preemptible.

[1]: http://lore.kernel.org/r/20210727230814.GC283787@lothringen
[2]: http://lore.kernel.org/r/20210729010445.GO4397@paulmck-ThinkPad-P17-Gen-1

Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lore.kernel.org/r/20210811201354.1976839-4-valentin.schneider@arm.com
  • Loading branch information
Valentin Schneider authored and Sebastian Andrzej Siewior committed Sep 13, 2021
1 parent 078166f commit e29353f
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 8 deletions.
4 changes: 4 additions & 0 deletions kernel/rcu/tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
.dynticks = ATOMIC_INIT(1),
#ifdef CONFIG_RCU_NOCB_CPU
.cblist.flags = SEGCBLIST_SOFTIRQ_ONLY,
.nocb_local_lock = INIT_LOCAL_LOCK(nocb_local_lock),
#endif
};
static struct rcu_state rcu_state = {
Expand Down Expand Up @@ -2811,10 +2812,12 @@ static void rcu_cpu_kthread(unsigned int cpu)
{
unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
int spincnt;

trace_rcu_utilization(TPS("Start CPU kthread@rcu_run"));
for (spincnt = 0; spincnt < 10; spincnt++) {
rcu_nocb_local_lock(rdp);
local_bh_disable();
*statusp = RCU_KTHREAD_RUNNING;
local_irq_disable();
Expand All @@ -2824,6 +2827,7 @@ static void rcu_cpu_kthread(unsigned int cpu)
if (work)
rcu_core();
local_bh_enable();
rcu_nocb_local_unlock(rdp);
if (*workp == 0) {
trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
*statusp = RCU_KTHREAD_WAITING;
Expand Down
4 changes: 4 additions & 0 deletions kernel/rcu/tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,8 @@ struct rcu_data {
struct timer_list nocb_timer; /* Enforce finite deferral. */
unsigned long nocb_gp_adv_time; /* Last call_rcu() CB adv (jiffies). */

local_lock_t nocb_local_lock;

/* The following fields are used by call_rcu, hence own cacheline. */
raw_spinlock_t nocb_bypass_lock ____cacheline_internodealigned_in_smp;
struct rcu_cblist nocb_bypass; /* Lock-contention-bypass CB list. */
Expand Down Expand Up @@ -445,6 +447,8 @@ static void rcu_nocb_unlock(struct rcu_data *rdp);
static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
unsigned long flags);
static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp);
static void rcu_nocb_local_lock(struct rcu_data *rdp);
static void rcu_nocb_local_unlock(struct rcu_data *rdp);
#ifdef CONFIG_RCU_NOCB_CPU
static void __init rcu_organize_nocb_kthreads(void);
#define rcu_nocb_lock_irqsave(rdp, flags) \
Expand Down
39 changes: 39 additions & 0 deletions kernel/rcu/tree_nocb.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp)
return lockdep_is_held(&rdp->nocb_lock);
}

static inline int rcu_lockdep_is_held_nocb_local(struct rcu_data *rdp)
{
return lockdep_is_held(&rdp->nocb_local_lock);
}

static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
{
/* Race on early boot between thread creation and assignment */
Expand Down Expand Up @@ -181,6 +186,22 @@ static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
}
}

/*
* The invocation of rcu_core() within the RCU core kthreads remains preemptible
* under PREEMPT_RT, thus the offload state of a CPU could change while
* said kthreads are preempted. Prevent this from happening by protecting the
* offload state with a local_lock().
*/
static void rcu_nocb_local_lock(struct rcu_data *rdp)
{
local_lock(&rcu_data.nocb_local_lock);
}

static void rcu_nocb_local_unlock(struct rcu_data *rdp)
{
local_unlock(&rcu_data.nocb_local_lock);
}

/* Lockdep check that ->cblist may be safely accessed. */
static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
{
Expand Down Expand Up @@ -948,6 +969,7 @@ static int rdp_offload_toggle(struct rcu_data *rdp,
if (rdp->nocb_cb_sleep)
rdp->nocb_cb_sleep = false;
rcu_nocb_unlock_irqrestore(rdp, flags);
rcu_nocb_local_unlock(rdp);

/*
* Ignore former value of nocb_cb_sleep and force wake up as it could
Expand Down Expand Up @@ -979,6 +1001,7 @@ static long rcu_nocb_rdp_deoffload(void *arg)

pr_info("De-offloading %d\n", rdp->cpu);

rcu_nocb_local_lock(rdp);
rcu_nocb_lock_irqsave(rdp, flags);
/*
* Flush once and for all now. This suffices because we are
Expand Down Expand Up @@ -1061,6 +1084,7 @@ static long rcu_nocb_rdp_offload(void *arg)
* Can't use rcu_nocb_lock_irqsave() while we are in
* SEGCBLIST_SOFTIRQ_ONLY mode.
*/
rcu_nocb_local_lock(rdp);
raw_spin_lock_irqsave(&rdp->nocb_lock, flags);

/*
Expand Down Expand Up @@ -1408,6 +1432,11 @@ static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp)
return 0;
}

static inline int rcu_lockdep_is_held_nocb_local(struct rcu_data *rdp)
{
return 0;
}

static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp)
{
return false;
Expand All @@ -1430,6 +1459,16 @@ static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
local_irq_restore(flags);
}

/* No ->nocb_local_lock to acquire. */
static void rcu_nocb_local_lock(struct rcu_data *rdp)
{
}

/* No ->nocb_local_lock to release. */
static void rcu_nocb_local_unlock(struct rcu_data *rdp)
{
}

/* Lockdep check that ->cblist may be safely accessed. */
static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp)
{
Expand Down
38 changes: 30 additions & 8 deletions kernel/rcu/tree_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,45 @@

#include "../locking/rtmutex_common.h"

/*
* Is a local read of the rdp's offloaded state safe and stable?
* See rcu_nocb_local_lock() & family.
*/
static inline bool rcu_local_offload_access_safe(struct rcu_data *rdp)
{
if (!preemptible())
return true;

if (!is_migratable()) {
if (!IS_ENABLED(CONFIG_RCU_NOCB))
return true;

return rcu_lockdep_is_held_nocb_local(rdp);
}

return false;
}

static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
{
/*
* In order to read the offloaded state of an rdp is a safe
* and stable way and prevent from its value to be changed
* under us, we must either hold the barrier mutex, the cpu
* hotplug lock (read or write) or the nocb lock. Local
* non-preemptible reads are also safe. NOCB kthreads and
* timers have their own means of synchronization against the
* offloaded state updaters.
* In order to read the offloaded state of an rdp is a safe and stable
* way and prevent from its value to be changed under us, we must
* either...
*/
RCU_LOCKDEP_WARN(
// ...hold the barrier mutex...
!(lockdep_is_held(&rcu_state.barrier_mutex) ||
// ... the cpu hotplug lock (read or write)...
(IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_held()) ||
// ... or the NOCB lock.
rcu_lockdep_is_held_nocb(rdp) ||
// Local reads still require the local state to remain stable
// (preemption disabled / local lock held)
(rdp == this_cpu_ptr(&rcu_data) &&
!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible())) ||
rcu_local_offload_access_safe(rdp)) ||
// NOCB kthreads and timers have their own means of
// synchronization against the offloaded state updaters.
rcu_current_is_nocb_kthread(rdp)),
"Unsafe read of RCU_NOCB offloaded state"
);
Expand Down

0 comments on commit e29353f

Please sign in to comment.