summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorBoqun Feng <boqun.feng@gmail.com>2026-01-11 20:15:07 +0800
committerBoqun Feng <boqun.feng@gmail.com>2026-01-11 20:15:07 +0800
commitfe1d4828846f025e42ca5e2112c01084c8aa8ab2 (patch)
treed87dc54a8d83e4d837d584376617a5ff422ee015 /kernel
parentacb0b2f5d6472b26db1d68f6d7b982d30b8bd8fe (diff)
parentbc3705e20988778791a4a5e9e2700fbc22cc942d (diff)
Merge branch 'rcu-misc.20260111a'
* rcu-misc.20260111a: rcu: Reduce synchronize_rcu() latency by reporting GP kthread's CPU QS early srcu: Use suitable gfp_flags for the init_srcu_struct_nodes() rcu: Fix rcu_read_unlock() deadloop due to softirq rcutorture: Correctly compute probability to invoke ->exp_current() rcu: Make expedited RCU CPU stall warnings detect stall-end races
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcu/rcutorture.c4
-rw-r--r--kernel/rcu/srcutree.c2
-rw-r--r--kernel/rcu/tree.c12
-rw-r--r--kernel/rcu/tree.h2
-rw-r--r--kernel/rcu/tree_exp.h7
-rw-r--r--kernel/rcu/tree_plugin.h15
6 files changed, 31 insertions, 11 deletions
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 63053031ade2..47ce7f49b52c 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1749,7 +1749,7 @@ rcu_torture_writer(void *arg)
ulo[i] = cur_ops->get_comp_state();
gp_snap = cur_ops->start_gp_poll();
rcu_torture_writer_state = RTWS_POLL_WAIT;
- if (cur_ops->exp_current && !torture_random(&rand) % 0xff)
+ if (cur_ops->exp_current && !(torture_random(&rand) & 0xff))
cur_ops->exp_current();
while (!cur_ops->poll_gp_state(gp_snap)) {
gp_snap1 = cur_ops->get_gp_state();
@@ -1771,7 +1771,7 @@ rcu_torture_writer(void *arg)
cur_ops->get_comp_state_full(&rgo[i]);
cur_ops->start_gp_poll_full(&gp_snap_full);
rcu_torture_writer_state = RTWS_POLL_WAIT_FULL;
- if (cur_ops->exp_current && !torture_random(&rand) % 0xff)
+ if (cur_ops->exp_current && !(torture_random(&rand) & 0xff))
cur_ops->exp_current();
while (!cur_ops->poll_gp_state_full(&gp_snap_full)) {
cur_ops->get_gp_state_full(&gp_snap1_full);
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index ea3f128de06f..c469c708fdd6 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -262,7 +262,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
ssp->srcu_sup->srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL;
ssp->srcu_sup->srcu_last_gp_end = ktime_get_mono_fast_ns();
if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) {
- if (!init_srcu_struct_nodes(ssp, GFP_ATOMIC))
+ if (!init_srcu_struct_nodes(ssp, is_static ? GFP_ATOMIC : GFP_KERNEL))
goto err_free_sda;
WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG);
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 293bbd9ac3f4..2c1c9759e278 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -160,6 +160,7 @@ static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
unsigned long gps, unsigned long flags);
static void invoke_rcu_core(void);
static void rcu_report_exp_rdp(struct rcu_data *rdp);
+static void rcu_report_qs_rdp(struct rcu_data *rdp);
static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp);
static bool rcu_rdp_is_offloaded(struct rcu_data *rdp);
static bool rcu_rdp_cpu_online(struct rcu_data *rdp);
@@ -1983,6 +1984,17 @@ static noinline_for_stack bool rcu_gp_init(void)
if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
on_each_cpu(rcu_strict_gp_boundary, NULL, 0);
+ /*
+ * Immediately report QS for the GP kthread's CPU. The GP kthread
+ * cannot be in an RCU read-side critical section while running
+ * the FQS scan. This eliminates the need for a second FQS wait
+ * when all CPUs are idle.
+ */
+ preempt_disable();
+ rcu_qs();
+ rcu_report_qs_rdp(this_cpu_ptr(&rcu_data));
+ preempt_enable();
+
return true;
}
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index b8bbe7960cda..2265b9c2906e 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -203,7 +203,7 @@ struct rcu_data {
/* during and after the last grace */
/* period it is aware of. */
struct irq_work defer_qs_iw; /* Obtain later scheduler attention. */
- int defer_qs_iw_pending; /* Scheduler attention pending? */
+ int defer_qs_pending; /* irqwork or softirq pending? */
struct work_struct strict_work; /* Schedule readers for strict GPs. */
/* 2) batch handling */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 96c49c56fc14..82cada459e5d 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -589,7 +589,12 @@ static void synchronize_rcu_expedited_stall(unsigned long jiffies_start, unsigne
pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
j - jiffies_start, rcu_state.expedited_sequence, data_race(rnp_root->expmask),
".T"[!!data_race(rnp_root->exp_tasks)]);
- if (ndetected) {
+ if (!ndetected) {
+ // This is invoked from the grace-period worker, so
+ // a new grace period cannot have started. And if this
+ // worker were stalled, we would not get here. ;-)
+ pr_err("INFO: Expedited stall ended before state dump start\n");
+ } else {
pr_err("blocking rcu_node structures (internal RCU debug):");
rcu_for_each_node_breadth_first(rnp) {
if (rnp == rnp_root)
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index dbe2d02be824..95ad967adcf3 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -487,8 +487,8 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
union rcu_special special;
rdp = this_cpu_ptr(&rcu_data);
- if (rdp->defer_qs_iw_pending == DEFER_QS_PENDING)
- rdp->defer_qs_iw_pending = DEFER_QS_IDLE;
+ if (rdp->defer_qs_pending == DEFER_QS_PENDING)
+ rdp->defer_qs_pending = DEFER_QS_IDLE;
/*
* If RCU core is waiting for this CPU to exit its critical section,
@@ -645,7 +645,7 @@ static void rcu_preempt_deferred_qs_handler(struct irq_work *iwp)
* 5. Deferred QS reporting does not happen.
*/
if (rcu_preempt_depth() > 0)
- WRITE_ONCE(rdp->defer_qs_iw_pending, DEFER_QS_IDLE);
+ WRITE_ONCE(rdp->defer_qs_pending, DEFER_QS_IDLE);
}
/*
@@ -747,7 +747,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
// Using softirq, safe to awaken, and either the
// wakeup is free or there is either an expedited
// GP in flight or a potential need to deboost.
- raise_softirq_irqoff(RCU_SOFTIRQ);
+ if (rdp->defer_qs_pending != DEFER_QS_PENDING) {
+ rdp->defer_qs_pending = DEFER_QS_PENDING;
+ raise_softirq_irqoff(RCU_SOFTIRQ);
+ }
} else {
// Enabling BH or preempt does reschedule, so...
// Also if no expediting and no possible deboosting,
@@ -755,11 +758,11 @@ static void rcu_read_unlock_special(struct task_struct *t)
// tick enabled.
set_need_resched_current();
if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled &&
- needs_exp && rdp->defer_qs_iw_pending != DEFER_QS_PENDING &&
+ needs_exp && rdp->defer_qs_pending != DEFER_QS_PENDING &&
cpu_online(rdp->cpu)) {
// Get scheduler to re-evaluate and call hooks.
// If !IRQ_WORK, FQS scan will eventually IPI.
- rdp->defer_qs_iw_pending = DEFER_QS_PENDING;
+ rdp->defer_qs_pending = DEFER_QS_PENDING;
irq_work_queue_on(&rdp->defer_qs_iw, rdp->cpu);
}
}