From 5d0f5953b60f5f7a278085b55ddc73e2932f4c33 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 15 Sep 2022 12:09:30 -0700 Subject: srcu: Convert ->srcu_lock_count and ->srcu_unlock_count to atomic NMI-safe variants of srcu_read_lock() and srcu_read_unlock() are needed by printk(), which on many architectures entails read-modify-write atomic operations. This commit prepares Tree SRCU for this change by making both ->srcu_lock_count and ->srcu_unlock_count by atomic_long_t. [ paulmck: Apply feedback from John Ogness. ] Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/ Signed-off-by: Paul E. McKenney Reviewed-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: John Ogness Cc: Petr Mladek --- include/linux/srcutree.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index e3014319d1ad..0c4eca07d78d 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -23,8 +23,8 @@ struct srcu_struct; */ struct srcu_data { /* Read-side state. */ - unsigned long srcu_lock_count[2]; /* Locks per CPU. */ - unsigned long srcu_unlock_count[2]; /* Unlocks per CPU. */ + atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */ + atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */ /* Update-side state. */ spinlock_t __private lock ____cacheline_internodealigned_in_smp; -- cgit v1.2.3 From b5ad0d2e8832c770b3ff2887ae37b47f42a3ab82 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Thu, 15 Sep 2022 16:38:24 +0800 Subject: rcu: Remove unused 'cpu' in rcu_virt_note_context_switch() This commit removes the unused function argument 'cpu'. This does not change functionality, but might save a cycle or two. Signed-off-by: Zeng Heng Acked-by: Mukesh Ojha Signed-off-by: Paul E. McKenney --- include/linux/kvm_host.h | 2 +- include/linux/rcutiny.h | 2 +- include/linux/rcutree.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 32f259fa5801..381b92d146c7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -416,7 +416,7 @@ static __always_inline void guest_context_enter_irqoff(void) */ if (!context_tracking_guest_enter()) { instrumentation_begin(); - rcu_virt_note_context_switch(smp_processor_id()); + rcu_virt_note_context_switch(); instrumentation_end(); } } diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 768196a5f39d..9bc025aa79a3 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -142,7 +142,7 @@ static inline int rcu_needs_cpu(void) * Take advantage of the fact that there is only one CPU, which * allows us to ignore virtualization-based context switches. */ -static inline void rcu_virt_note_context_switch(int cpu) { } +static inline void rcu_virt_note_context_switch(void) { } static inline void rcu_cpu_stall_reset(void) { } static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; } static inline void rcu_irq_exit_check_preempt(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 5efb51486e8a..70795386b9ff 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -27,7 +27,7 @@ void rcu_cpu_stall_reset(void); * wrapper around rcu_note_context_switch(), which allows TINY_RCU * to save a few bytes. The caller must have disabled interrupts. */ -static inline void rcu_virt_note_context_switch(int cpu) +static inline void rcu_virt_note_context_switch(void) { rcu_note_context_switch(false); } -- cgit v1.2.3 From 2e83b879fb91dafe995967b46a1d38a5b0889242 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 15 Sep 2022 14:29:07 -0700 Subject: srcu: Create an srcu_read_lock_nmisafe() and srcu_read_unlock_nmisafe() On strict load-store architectures, the use of this_cpu_inc() by srcu_read_lock() and srcu_read_unlock() is not NMI-safe in TREE SRCU. To see this suppose that an NMI arrives in the middle of srcu_read_lock(), just after it has read ->srcu_lock_count, but before it has written the incremented value back to memory. If that NMI handler also does srcu_read_lock() and srcu_read_lock() on that same srcu_struct structure, then upon return from that NMI handler, the interrupted srcu_read_lock() will overwrite the NMI handler's update to ->srcu_lock_count, but leave unchanged the NMI handler's update by srcu_read_unlock() to ->srcu_unlock_count. This can result in a too-short SRCU grace period, which can in turn result in arbitrary memory corruption. If the NMI handler instead interrupts the srcu_read_unlock(), this can result in eternal SRCU grace periods, which is not much better. This commit therefore creates a pair of new srcu_read_lock_nmisafe() and srcu_read_unlock_nmisafe() functions, which allow SRCU readers in both NMI handlers and in process and IRQ context. It is bad practice to mix the existing and the new _nmisafe() primitives on the same srcu_struct structure. Use one set or the other, not both. Just to underline that "bad practice" point, using srcu_read_lock() at process level and srcu_read_lock_nmisafe() in your NMI handler will not, repeat NOT, work. If you do not immediately understand why this is the case, please review the earlier paragraphs in this commit log. [ paulmck: Apply kernel test robot feedback. ] [ paulmck: Apply feedback from Randy Dunlap. ] [ paulmck: Apply feedback from John Ogness. ] [ paulmck: Apply feedback from Frederic Weisbecker. ] Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/ Signed-off-by: Paul E. McKenney Acked-by: Randy Dunlap # build-tested Reviewed-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: John Ogness Cc: Petr Mladek --- arch/Kconfig | 3 +++ include/linux/srcu.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++++ kernel/rcu/Kconfig | 3 +++ kernel/rcu/rcutorture.c | 11 +++++++++-- kernel/rcu/srcutree.c | 43 +++++++++++++++++++++++++++++++++++++---- 5 files changed, 105 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/Kconfig b/arch/Kconfig index 8f138e580d1a..6b95244c3057 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -468,6 +468,9 @@ config ARCH_WANT_IRQS_OFF_ACTIVATE_MM config ARCH_HAVE_NMI_SAFE_CMPXCHG bool +config ARCH_HAS_NMI_SAFE_THIS_CPU_OPS + bool + config HAVE_ALIGNED_STRUCT_PAGE bool help diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 01226e4d960a..4fac088072c3 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -64,6 +64,20 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp); unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp); bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie); +#ifdef CONFIG_NEED_SRCU_NMI_SAFE +int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp); +void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp); +#else +static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) +{ + return __srcu_read_lock(ssp); +} +static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) +{ + __srcu_read_unlock(ssp, idx); +} +#endif /* CONFIG_NEED_SRCU_NMI_SAFE */ + #ifdef CONFIG_SRCU void srcu_init(void); #else /* #ifdef CONFIG_SRCU */ @@ -166,6 +180,25 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) return retval; } +/** + * srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure. + * @ssp: srcu_struct in which to register the new reader. + * + * Enter an SRCU read-side critical section, but in an NMI-safe manner. + * See srcu_read_lock() for more information. + */ +static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp) +{ + int retval; + + if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) + retval = __srcu_read_lock_nmisafe(ssp); + else + retval = __srcu_read_lock(ssp); + rcu_lock_acquire(&(ssp)->dep_map); + return retval; +} + /* Used by tracing, cannot be traced and cannot invoke lockdep. */ static inline notrace int srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) @@ -191,6 +224,24 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) __srcu_read_unlock(ssp, idx); } +/** + * srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure. + * @ssp: srcu_struct in which to unregister the old reader. + * @idx: return value from corresponding srcu_read_lock(). + * + * Exit an SRCU read-side critical section, but in an NMI-safe manner. + */ +static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) + __releases(ssp) +{ + WARN_ON_ONCE(idx & ~0x1); + rcu_lock_release(&(ssp)->dep_map); + if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) + __srcu_read_unlock_nmisafe(ssp, idx); + else + __srcu_read_unlock(ssp, idx); +} + /* Used by tracing, cannot be traced and cannot call lockdep. */ static inline notrace void srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp) diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index d471d22a5e21..f53ad63b2bc6 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -72,6 +72,9 @@ config TREE_SRCU help This option selects the full-fledged version of SRCU. +config NEED_SRCU_NMI_SAFE + def_bool HAVE_NMI && !ARCH_HAS_NMI_SAFE_THIS_CPU_OPS && !TINY_SRCU + config TASKS_RCU_GENERIC def_bool TASKS_RCU || TASKS_RUDE_RCU || TASKS_TRACE_RCU select SRCU diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 503c2aa845a4..b4c74ce10225 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -615,10 +615,14 @@ static struct rcu_torture_ops rcu_busted_ops = { DEFINE_STATIC_SRCU(srcu_ctl); static struct srcu_struct srcu_ctld; static struct srcu_struct *srcu_ctlp = &srcu_ctl; +static struct rcu_torture_ops srcud_ops; static int srcu_torture_read_lock(void) __acquires(srcu_ctlp) { - return srcu_read_lock(srcu_ctlp); + if (cur_ops == &srcud_ops) + return srcu_read_lock_nmisafe(srcu_ctlp); + else + return srcu_read_lock(srcu_ctlp); } static void @@ -642,7 +646,10 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp) static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp) { - srcu_read_unlock(srcu_ctlp, idx); + if (cur_ops == &srcud_ops) + srcu_read_unlock_nmisafe(srcu_ctlp, idx); + else + srcu_read_unlock(srcu_ctlp, idx); } static int torture_srcu_read_lock_held(void) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 25e9458da6a2..32a94b254d29 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -654,6 +654,41 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx) } EXPORT_SYMBOL_GPL(__srcu_read_unlock); +#ifdef CONFIG_NEED_SRCU_NMI_SAFE + +/* + * Counts the new reader in the appropriate per-CPU element of the + * srcu_struct, but in an NMI-safe manner using RMW atomics. + * Returns an index that must be passed to the matching srcu_read_unlock(). + */ +int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) +{ + int idx; + struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); + + idx = READ_ONCE(ssp->srcu_idx) & 0x1; + atomic_long_inc(&sdp->srcu_lock_count[idx]); + smp_mb__after_atomic(); /* B */ /* Avoid leaking the critical section. */ + return idx; +} +EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe); + +/* + * Removes the count for the old reader from the appropriate per-CPU + * element of the srcu_struct. Note that this may well be a different + * CPU than that which was incremented by the corresponding srcu_read_lock(). + */ +void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) +{ + struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); + + smp_mb__before_atomic(); /* C */ /* Avoid leaking the critical section. */ + atomic_long_inc(&sdp->srcu_unlock_count[idx]); +} +EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe); + +#endif // CONFIG_NEED_SRCU_NMI_SAFE + /* * Start an SRCU grace period. */ @@ -1090,7 +1125,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, int ss_state; check_init_srcu_struct(ssp); - idx = srcu_read_lock(ssp); + idx = __srcu_read_lock_nmisafe(ssp); ss_state = smp_load_acquire(&ssp->srcu_size_state); if (ss_state < SRCU_SIZE_WAIT_CALL) sdp = per_cpu_ptr(ssp->sda, 0); @@ -1123,7 +1158,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, srcu_funnel_gp_start(ssp, sdp, s, do_norm); else if (needexp) srcu_funnel_exp_start(ssp, sdp_mynode, s); - srcu_read_unlock(ssp, idx); + __srcu_read_unlock_nmisafe(ssp, idx); return s; } @@ -1427,13 +1462,13 @@ void srcu_barrier(struct srcu_struct *ssp) /* Initial count prevents reaching zero until all CBs are posted. */ atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); - idx = srcu_read_lock(ssp); + idx = __srcu_read_lock_nmisafe(ssp); if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0)); else for_each_possible_cpu(cpu) srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu)); - srcu_read_unlock(ssp, idx); + __srcu_read_unlock_nmisafe(ssp, idx); /* Remove the initial count, at which point reaching zero can happen. */ if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt)) -- cgit v1.2.3 From 27120e7d2c4d5c438b76f9c6330037a52ad0722e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 19 Sep 2022 14:03:07 -0700 Subject: srcu: Check for consistent per-CPU per-srcu_struct NMI safety This commit adds runtime checks to verify that a given srcu_struct uses consistent NMI-safe (or not) read-side primitives on a per-CPU basis. Link: https://lore.kernel.org/all/20220910221947.171557773@linutronix.de/ Signed-off-by: Paul E. McKenney Reviewed-by: Frederic Weisbecker Cc: Thomas Gleixner Cc: John Ogness Cc: Petr Mladek --- include/linux/srcu.h | 12 ++++++------ include/linux/srcutree.h | 5 +++++ kernel/rcu/srcutree.c | 38 ++++++++++++++++++++++++++++++++------ 3 files changed, 43 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 4fac088072c3..1a7840c1b87a 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -65,14 +65,14 @@ unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp); bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie); #ifdef CONFIG_NEED_SRCU_NMI_SAFE -int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp); -void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp); +int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, bool chknmisafe) __acquires(ssp); +void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx, bool chknmisafe) __releases(ssp); #else -static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) +static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, bool chknmisafe) { return __srcu_read_lock(ssp); } -static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) +static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx, bool chknmisafe) { __srcu_read_unlock(ssp, idx); } @@ -192,7 +192,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp int retval; if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) - retval = __srcu_read_lock_nmisafe(ssp); + retval = __srcu_read_lock_nmisafe(ssp, true); else retval = __srcu_read_lock(ssp); rcu_lock_acquire(&(ssp)->dep_map); @@ -237,7 +237,7 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) WARN_ON_ONCE(idx & ~0x1); rcu_lock_release(&(ssp)->dep_map); if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) - __srcu_read_unlock_nmisafe(ssp, idx); + __srcu_read_unlock_nmisafe(ssp, idx, true); else __srcu_read_unlock(ssp, idx); } diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 0c4eca07d78d..1ef8f2a4884f 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -25,6 +25,7 @@ struct srcu_data { /* Read-side state. */ atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */ atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */ + int srcu_nmi_safety; /* NMI-safe srcu_struct structure? */ /* Update-side state. */ spinlock_t __private lock ____cacheline_internodealigned_in_smp; @@ -42,6 +43,10 @@ struct srcu_data { struct srcu_struct *ssp; }; +#define SRCU_NMI_UNKNOWN 0x0 +#define SRCU_NMI_NMI_UNSAFE 0x1 +#define SRCU_NMI_NMI_SAFE 0x2 + /* * Node in SRCU combining tree, similar in function to rcu_data. */ diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 32a94b254d29..30575864fcfa 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -626,6 +626,26 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) } EXPORT_SYMBOL_GPL(cleanup_srcu_struct); +/* + * Check for consistent NMI safety. + */ +static void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe) +{ + int nmi_safe_mask = 1 << nmi_safe; + int old_nmi_safe_mask; + struct srcu_data *sdp; + + if (!IS_ENABLED(CONFIG_PROVE_RCU)) + return; + sdp = raw_cpu_ptr(ssp->sda); + old_nmi_safe_mask = READ_ONCE(sdp->srcu_nmi_safety); + if (!old_nmi_safe_mask) { + WRITE_ONCE(sdp->srcu_nmi_safety, nmi_safe_mask); + return; + } + WARN_ONCE(old_nmi_safe_mask != nmi_safe_mask, "CPU %d old state %d new state %d\n", sdp->cpu, old_nmi_safe_mask, nmi_safe_mask); +} + /* * Counts the new reader in the appropriate per-CPU element of the * srcu_struct. @@ -638,6 +658,7 @@ int __srcu_read_lock(struct srcu_struct *ssp) idx = READ_ONCE(ssp->srcu_idx) & 0x1; this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); smp_mb(); /* B */ /* Avoid leaking the critical section. */ + srcu_check_nmi_safety(ssp, false); return idx; } EXPORT_SYMBOL_GPL(__srcu_read_lock); @@ -651,6 +672,7 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx) { smp_mb(); /* C */ /* Avoid leaking the critical section. */ this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter); + srcu_check_nmi_safety(ssp, false); } EXPORT_SYMBOL_GPL(__srcu_read_unlock); @@ -661,7 +683,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock); * srcu_struct, but in an NMI-safe manner using RMW atomics. * Returns an index that must be passed to the matching srcu_read_unlock(). */ -int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) +int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, bool chknmisafe) { int idx; struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); @@ -669,6 +691,8 @@ int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) idx = READ_ONCE(ssp->srcu_idx) & 0x1; atomic_long_inc(&sdp->srcu_lock_count[idx]); smp_mb__after_atomic(); /* B */ /* Avoid leaking the critical section. */ + if (chknmisafe) + srcu_check_nmi_safety(ssp, true); return idx; } EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe); @@ -678,12 +702,14 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe); * element of the srcu_struct. Note that this may well be a different * CPU than that which was incremented by the corresponding srcu_read_lock(). */ -void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) +void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx, bool chknmisafe) { struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); smp_mb__before_atomic(); /* C */ /* Avoid leaking the critical section. */ atomic_long_inc(&sdp->srcu_unlock_count[idx]); + if (chknmisafe) + srcu_check_nmi_safety(ssp, true); } EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe); @@ -1125,7 +1151,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, int ss_state; check_init_srcu_struct(ssp); - idx = __srcu_read_lock_nmisafe(ssp); + idx = __srcu_read_lock_nmisafe(ssp, false); ss_state = smp_load_acquire(&ssp->srcu_size_state); if (ss_state < SRCU_SIZE_WAIT_CALL) sdp = per_cpu_ptr(ssp->sda, 0); @@ -1158,7 +1184,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, srcu_funnel_gp_start(ssp, sdp, s, do_norm); else if (needexp) srcu_funnel_exp_start(ssp, sdp_mynode, s); - __srcu_read_unlock_nmisafe(ssp, idx); + __srcu_read_unlock_nmisafe(ssp, idx, false); return s; } @@ -1462,13 +1488,13 @@ void srcu_barrier(struct srcu_struct *ssp) /* Initial count prevents reaching zero until all CBs are posted. */ atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); - idx = __srcu_read_lock_nmisafe(ssp); + idx = __srcu_read_lock_nmisafe(ssp, false); if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0)); else for_each_possible_cpu(cpu) srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu)); - __srcu_read_unlock_nmisafe(ssp, idx); + __srcu_read_unlock_nmisafe(ssp, idx, false); /* Remove the initial count, at which point reaching zero can happen. */ if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt)) -- cgit v1.2.3 From e9f8a790bf682bb4a2f79d0d905b34d55bceb71d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 26 Sep 2022 08:57:56 -0700 Subject: slab: Explain why SLAB_TYPESAFE_BY_RCU reference before locking It is not obvious to the casual user why it is absolutely necessary to acquire a reference to a SLAB_TYPESAFE_BY_RCU structure before acquiring a lock in that structure. Therefore, add a comment explaining this point. [ paulmck: Apply Vlastimil Babka feedback. ] Signed-off-by: Paul E. McKenney Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrew Morton Cc: Roman Gushchin Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Acked-by: Vlastimil Babka --- include/linux/slab.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 90877fcde70b..487418c7ea8c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -76,6 +76,17 @@ * rcu_read_lock before reading the address, then rcu_read_unlock after * taking the spinlock within the structure expected at that address. * + * Note that it is not possible to acquire a lock within a structure + * allocated with SLAB_TYPESAFE_BY_RCU without first acquiring a reference + * as described above. The reason is that SLAB_TYPESAFE_BY_RCU pages + * are not zeroed before being given to the slab, which means that any + * locks must be initialized after each and every kmem_struct_alloc(). + * Alternatively, make the ctor passed to kmem_cache_create() initialize + * the locks at page-allocation time, as is done in __i915_request_ctor(), + * sighand_ctor(), and anon_vma_ctor(). Such a ctor permits readers + * to safely acquire those ctor-initialized locks under rcu_read_lock() + * protection. + * * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ /* Defer freeing slabs to RCU */ -- cgit v1.2.3 From fdbdb868454a3e83996cf1500c6f7ba73c07a03c Mon Sep 17 00:00:00 2001 From: Yipeng Zou Date: Mon, 26 Sep 2022 09:58:27 +0800 Subject: rcu: Remove rcu_is_idle_cpu() The commit 3fcd6a230fa7 ("x86/cpu: Avoid cpuinfo-induced IPIing of idle CPUs") introduced rcu_is_idle_cpu() in order to identify the current CPU idle state. But commit f3eca381bd49 ("x86/aperfmperf: Replace arch_freq_get_on_cpu()") switched to using MAX_SAMPLE_AGE, so rcu_is_idle_cpu() is no longer used. This commit therefore removes it. Fixes: f3eca381bd49 ("x86/aperfmperf: Replace arch_freq_get_on_cpu()") Signed-off-by: Yipeng Zou Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 2 -- include/linux/rcutree.h | 2 -- kernel/rcu/tree.c | 6 ------ 3 files changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 9bc025aa79a3..5c271bf3a1e7 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -146,8 +146,6 @@ static inline void rcu_virt_note_context_switch(void) { } static inline void rcu_cpu_stall_reset(void) { } static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; } static inline void rcu_irq_exit_check_preempt(void) { } -#define rcu_is_idle_cpu(cpu) \ - (is_idle_task(current) && !in_nmi() && !in_hardirq() && !in_serving_softirq()) static inline void exit_rcu(void) { } static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t) { diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 70795386b9ff..4003bf6cfa1c 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -87,8 +87,6 @@ bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); void cond_synchronize_rcu(unsigned long oldstate); void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp); -bool rcu_is_idle_cpu(int cpu); - #ifdef CONFIG_PROVE_RCU void rcu_irq_exit_check_preempt(void); #else diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 5ec97e3f7468..f6561aa401c0 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -301,12 +301,6 @@ static bool rcu_dynticks_in_eqs(int snap) return !(snap & RCU_DYNTICKS_IDX); } -/* Return true if the specified CPU is currently idle from an RCU viewpoint. */ -bool rcu_is_idle_cpu(int cpu) -{ - return rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu)); -} - /* * Return true if the CPU corresponding to the specified rcu_data * structure has spent some time in an extended quiescent state since -- cgit v1.2.3 From e29a4915db1480f96e0bc2e928699d086a71f43c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 13 Oct 2022 19:22:44 +0200 Subject: srcu: Debug NMI safety even on archs that don't require it Currently the NMI safety debugging is only performed on architectures that don't support NMI-safe this_cpu_inc(). Reorder the code so that other architectures like x86 also detect bad uses. [ paulmck: Apply kernel test robot, Stephen Rothwell, and Zqiang feedback. ] Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 36 ++++++++++++++++++++++++------------ include/linux/srcutree.h | 4 ---- kernel/rcu/srcutree.c | 25 ++++++++++--------------- 3 files changed, 34 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 1a7840c1b87a..f0814ffca34b 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -65,14 +65,14 @@ unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp); bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie); #ifdef CONFIG_NEED_SRCU_NMI_SAFE -int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, bool chknmisafe) __acquires(ssp); -void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx, bool chknmisafe) __releases(ssp); +int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp); +void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp); #else -static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, bool chknmisafe) +static inline int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) { return __srcu_read_lock(ssp); } -static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx, bool chknmisafe) +static inline void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) { __srcu_read_unlock(ssp, idx); } @@ -118,6 +118,18 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#define SRCU_NMI_UNKNOWN 0x0 +#define SRCU_NMI_UNSAFE 0x1 +#define SRCU_NMI_SAFE 0x2 + +#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU) +void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe); +#else +static inline void srcu_check_nmi_safety(struct srcu_struct *ssp, + bool nmi_safe) { } +#endif + + /** * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing * @p: the pointer to fetch and protect for later dereferencing @@ -175,6 +187,7 @@ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) { int retval; + srcu_check_nmi_safety(ssp, false); retval = __srcu_read_lock(ssp); rcu_lock_acquire(&(ssp)->dep_map); return retval; @@ -191,10 +204,8 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp { int retval; - if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) - retval = __srcu_read_lock_nmisafe(ssp, true); - else - retval = __srcu_read_lock(ssp); + srcu_check_nmi_safety(ssp, true); + retval = __srcu_read_lock_nmisafe(ssp); rcu_lock_acquire(&(ssp)->dep_map); return retval; } @@ -205,6 +216,7 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) { int retval; + srcu_check_nmi_safety(ssp, false); retval = __srcu_read_lock(ssp); return retval; } @@ -220,6 +232,7 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp) { WARN_ON_ONCE(idx & ~0x1); + srcu_check_nmi_safety(ssp, false); rcu_lock_release(&(ssp)->dep_map); __srcu_read_unlock(ssp, idx); } @@ -235,17 +248,16 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp) { WARN_ON_ONCE(idx & ~0x1); + srcu_check_nmi_safety(ssp, true); rcu_lock_release(&(ssp)->dep_map); - if (IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) - __srcu_read_unlock_nmisafe(ssp, idx, true); - else - __srcu_read_unlock(ssp, idx); + __srcu_read_unlock_nmisafe(ssp, idx); } /* Used by tracing, cannot be traced and cannot call lockdep. */ static inline notrace void srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp) { + srcu_check_nmi_safety(ssp, false); __srcu_read_unlock(ssp, idx); } diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 1ef8f2a4884f..c689a81752c9 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -43,10 +43,6 @@ struct srcu_data { struct srcu_struct *ssp; }; -#define SRCU_NMI_UNKNOWN 0x0 -#define SRCU_NMI_NMI_UNSAFE 0x1 -#define SRCU_NMI_NMI_SAFE 0x2 - /* * Node in SRCU combining tree, similar in function to rcu_data. */ diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 272830a87e56..ca4b5dcec675 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -631,17 +631,16 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) } EXPORT_SYMBOL_GPL(cleanup_srcu_struct); +#ifdef CONFIG_PROVE_RCU /* * Check for consistent NMI safety. */ -static void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe) +void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe) { int nmi_safe_mask = 1 << nmi_safe; int old_nmi_safe_mask; struct srcu_data *sdp; - if (!IS_ENABLED(CONFIG_PROVE_RCU)) - return; /* NMI-unsafe use in NMI is a bad sign */ WARN_ON_ONCE(!nmi_safe && in_nmi()); sdp = raw_cpu_ptr(ssp->sda); @@ -652,6 +651,8 @@ static void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe) } WARN_ONCE(old_nmi_safe_mask != nmi_safe_mask, "CPU %d old state %d new state %d\n", sdp->cpu, old_nmi_safe_mask, nmi_safe_mask); } +EXPORT_SYMBOL_GPL(srcu_check_nmi_safety); +#endif /* CONFIG_PROVE_RCU */ /* * Counts the new reader in the appropriate per-CPU element of the @@ -665,7 +666,6 @@ int __srcu_read_lock(struct srcu_struct *ssp) idx = READ_ONCE(ssp->srcu_idx) & 0x1; this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); smp_mb(); /* B */ /* Avoid leaking the critical section. */ - srcu_check_nmi_safety(ssp, false); return idx; } EXPORT_SYMBOL_GPL(__srcu_read_lock); @@ -679,7 +679,6 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int idx) { smp_mb(); /* C */ /* Avoid leaking the critical section. */ this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter); - srcu_check_nmi_safety(ssp, false); } EXPORT_SYMBOL_GPL(__srcu_read_unlock); @@ -690,7 +689,7 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock); * srcu_struct, but in an NMI-safe manner using RMW atomics. * Returns an index that must be passed to the matching srcu_read_unlock(). */ -int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, bool chknmisafe) +int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) { int idx; struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); @@ -698,8 +697,6 @@ int __srcu_read_lock_nmisafe(struct srcu_struct *ssp, bool chknmisafe) idx = READ_ONCE(ssp->srcu_idx) & 0x1; atomic_long_inc(&sdp->srcu_lock_count[idx]); smp_mb__after_atomic(); /* B */ /* Avoid leaking the critical section. */ - if (chknmisafe) - srcu_check_nmi_safety(ssp, true); return idx; } EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe); @@ -709,14 +706,12 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe); * element of the srcu_struct. Note that this may well be a different * CPU than that which was incremented by the corresponding srcu_read_lock(). */ -void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx, bool chknmisafe) +void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) { struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); smp_mb__before_atomic(); /* C */ /* Avoid leaking the critical section. */ atomic_long_inc(&sdp->srcu_unlock_count[idx]); - if (chknmisafe) - srcu_check_nmi_safety(ssp, true); } EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe); @@ -1163,7 +1158,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, * SRCU read-side critical section so that the grace-period * sequence number cannot wrap around in the meantime. */ - idx = __srcu_read_lock_nmisafe(ssp, false); + idx = __srcu_read_lock_nmisafe(ssp); ss_state = smp_load_acquire(&ssp->srcu_size_state); if (ss_state < SRCU_SIZE_WAIT_CALL) sdp = per_cpu_ptr(ssp->sda, 0); @@ -1196,7 +1191,7 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, srcu_funnel_gp_start(ssp, sdp, s, do_norm); else if (needexp) srcu_funnel_exp_start(ssp, sdp_mynode, s); - __srcu_read_unlock_nmisafe(ssp, idx, false); + __srcu_read_unlock_nmisafe(ssp, idx); return s; } @@ -1500,13 +1495,13 @@ void srcu_barrier(struct srcu_struct *ssp) /* Initial count prevents reaching zero until all CBs are posted. */ atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); - idx = __srcu_read_lock_nmisafe(ssp, false); + idx = __srcu_read_lock_nmisafe(ssp); if (smp_load_acquire(&ssp->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, 0)); else for_each_possible_cpu(cpu) srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu)); - __srcu_read_unlock_nmisafe(ssp, idx, false); + __srcu_read_unlock_nmisafe(ssp, idx); /* Remove the initial count, at which point reaching zero can happen. */ if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt)) -- cgit v1.2.3 From f733615e39aa2d6ddeef33b7b2c9aa6a5a2c2785 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Mon, 7 Nov 2022 15:21:59 +0106 Subject: rcu: Implement lockdep_rcu_enabled for !CONFIG_DEBUG_LOCK_ALLOC Provide an implementation for debug_lockdep_rcu_enabled() when CONFIG_DEBUG_LOCK_ALLOC is not enabled. This allows code to check if rcu lockdep debugging is available without needing an extra check if CONFIG_DEBUG_LOCK_ALLOC is enabled. Signed-off-by: John Ogness Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 08605ce7379d..65178c40ab6f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -340,6 +340,11 @@ static inline int rcu_read_lock_any_held(void) return !preemptible(); } +static inline int debug_lockdep_rcu_enabled(void) +{ + return 0; +} + #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ #ifdef CONFIG_PROVE_RCU -- cgit v1.2.3 From 3cb278e73be58bfb780ecd55129296d2f74c1fb7 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sun, 16 Oct 2022 16:22:54 +0000 Subject: rcu: Make call_rcu() lazy to save power Implement timer-based RCU callback batching (also known as lazy callbacks). With this we save about 5-10% of power consumed due to RCU requests that happen when system is lightly loaded or idle. By default, all async callbacks (queued via call_rcu) are marked lazy. An alternate API call_rcu_hurry() is provided for the few users, for example synchronize_rcu(), that need the old behavior. The batch is flushed whenever a certain amount of time has passed, or the batch on a particular CPU grows too big. Also memory pressure will flush it in a future patch. To handle several corner cases automagically (such as rcu_barrier() and hotplug), we re-use bypass lists which were originally introduced to address lock contention, to handle lazy CBs as well. The bypass list length has the lazy CB length included in it. A separate lazy CB length counter is also introduced to keep track of the number of lazy CBs. [ paulmck: Fix formatting of inline call_rcu_lazy() definition. ] [ paulmck: Apply Zqiang feedback. ] [ paulmck: Apply s/call_rcu_flush/call_rcu_hurry/ feedback from Tejun Heo. ] Suggested-by: Paul McKenney Acked-by: Frederic Weisbecker Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 9 +++ kernel/rcu/Kconfig | 8 +++ kernel/rcu/rcu.h | 8 +++ kernel/rcu/tiny.c | 2 +- kernel/rcu/tree.c | 129 ++++++++++++++++++++++++-------------- kernel/rcu/tree.h | 11 ++-- kernel/rcu/tree_exp.h | 2 +- kernel/rcu/tree_nocb.h | 159 ++++++++++++++++++++++++++++++++++++++--------- 8 files changed, 246 insertions(+), 82 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 08605ce7379d..611c11383d23 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -108,6 +108,15 @@ static inline int rcu_preempt_depth(void) #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_RCU_LAZY +void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func); +#else +static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) +{ + call_rcu(head, func); +} +#endif + /* Internal to kernel */ void rcu_init(void); extern int rcu_scheduler_active; diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index d471d22a5e21..d78f6181c8aa 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -311,4 +311,12 @@ config TASKS_TRACE_RCU_READ_MB Say N here if you hate read-side memory barriers. Take the default if you are unsure. +config RCU_LAZY + bool "RCU callback lazy invocation functionality" + depends on RCU_NOCB_CPU + default n + help + To save power, batch RCU callbacks and flush after delay, memory + pressure, or callback list growing too big. + endmenu # "RCU Subsystem" diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index be5979da07f5..65704cbc9df7 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -474,6 +474,14 @@ enum rcutorture_type { INVALID_RCU_FLAVOR }; +#if defined(CONFIG_RCU_LAZY) +unsigned long rcu_lazy_get_jiffies_till_flush(void); +void rcu_lazy_set_jiffies_till_flush(unsigned long j); +#else +static inline unsigned long rcu_lazy_get_jiffies_till_flush(void) { return 0; } +static inline void rcu_lazy_set_jiffies_till_flush(unsigned long j) { } +#endif + #if defined(CONFIG_TREE_RCU) void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, unsigned long *gp_seq); diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index a33a8d4942c3..72913ce21258 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -44,7 +44,7 @@ static struct rcu_ctrlblk rcu_ctrlblk = { void rcu_barrier(void) { - wait_rcu_gp(call_rcu); + wait_rcu_gp(call_rcu_hurry); } EXPORT_SYMBOL(rcu_barrier); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index fb7a1b95af71..4b68e50312d9 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2728,47 +2728,8 @@ static void check_cb_ovld(struct rcu_data *rdp) raw_spin_unlock_rcu_node(rnp); } -/** - * call_rcu() - Queue an RCU callback for invocation after a grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual callback function to be invoked after the grace period - * - * The callback function will be invoked some time after a full grace - * period elapses, in other words after all pre-existing RCU read-side - * critical sections have completed. However, the callback function - * might well execute concurrently with RCU read-side critical sections - * that started after call_rcu() was invoked. - * - * RCU read-side critical sections are delimited by rcu_read_lock() - * and rcu_read_unlock(), and may be nested. In addition, but only in - * v5.0 and later, regions of code across which interrupts, preemption, - * or softirqs have been disabled also serve as RCU read-side critical - * sections. This includes hardware interrupt handlers, softirq handlers, - * and NMI handlers. - * - * Note that all CPUs must agree that the grace period extended beyond - * all pre-existing RCU read-side critical section. On systems with more - * than one CPU, this means that when "func()" is invoked, each CPU is - * guaranteed to have executed a full memory barrier since the end of its - * last RCU read-side critical section whose beginning preceded the call - * to call_rcu(). It also means that each CPU executing an RCU read-side - * critical section that continues beyond the start of "func()" must have - * executed a memory barrier after the call_rcu() but before the beginning - * of that RCU read-side critical section. Note that these guarantees - * include CPUs that are offline, idle, or executing in user mode, as - * well as CPUs that are executing in the kernel. - * - * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the - * resulting RCU callback function "func()", then both CPU A and CPU B are - * guaranteed to execute a full memory barrier during the time interval - * between the call to call_rcu() and the invocation of "func()" -- even - * if CPU A and CPU B are the same CPU (but again only if the system has - * more than one CPU). - * - * Implementation of these memory-ordering guarantees is described here: - * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst. - */ -void call_rcu(struct rcu_head *head, rcu_callback_t func) +static void +__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy) { static atomic_t doublefrees; unsigned long flags; @@ -2809,7 +2770,7 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func) } check_cb_ovld(rdp); - if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags)) + if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy)) return; // Enqueued onto ->nocb_bypass, so just leave. // If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock. rcu_segcblist_enqueue(&rdp->cblist, head); @@ -2831,8 +2792,84 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func) local_irq_restore(flags); } } -EXPORT_SYMBOL_GPL(call_rcu); +#ifdef CONFIG_RCU_LAZY +/** + * call_rcu_hurry() - Queue RCU callback for invocation after grace period, and + * flush all lazy callbacks (including the new one) to the main ->cblist while + * doing so. + * + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all pre-existing RCU read-side + * critical sections have completed. + * + * Use this API instead of call_rcu() if you don't want the callback to be + * invoked after very long periods of time, which can happen on systems without + * memory pressure and on systems which are lightly loaded or mostly idle. + * This function will cause callbacks to be invoked sooner than later at the + * expense of extra power. Other than that, this function is identical to, and + * reuses call_rcu()'s logic. Refer to call_rcu() for more details about memory + * ordering and other functionality. + */ +void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) +{ + return __call_rcu_common(head, func, false); +} +EXPORT_SYMBOL_GPL(call_rcu_hurry); +#endif + +/** + * call_rcu() - Queue an RCU callback for invocation after a grace period. + * By default the callbacks are 'lazy' and are kept hidden from the main + * ->cblist to prevent starting of grace periods too soon. + * If you desire grace periods to start very soon, use call_rcu_hurry(). + * + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all pre-existing RCU read-side + * critical sections have completed. However, the callback function + * might well execute concurrently with RCU read-side critical sections + * that started after call_rcu() was invoked. + * + * RCU read-side critical sections are delimited by rcu_read_lock() + * and rcu_read_unlock(), and may be nested. In addition, but only in + * v5.0 and later, regions of code across which interrupts, preemption, + * or softirqs have been disabled also serve as RCU read-side critical + * sections. This includes hardware interrupt handlers, softirq handlers, + * and NMI handlers. + * + * Note that all CPUs must agree that the grace period extended beyond + * all pre-existing RCU read-side critical section. On systems with more + * than one CPU, this means that when "func()" is invoked, each CPU is + * guaranteed to have executed a full memory barrier since the end of its + * last RCU read-side critical section whose beginning preceded the call + * to call_rcu(). It also means that each CPU executing an RCU read-side + * critical section that continues beyond the start of "func()" must have + * executed a memory barrier after the call_rcu() but before the beginning + * of that RCU read-side critical section. Note that these guarantees + * include CPUs that are offline, idle, or executing in user mode, as + * well as CPUs that are executing in the kernel. + * + * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the + * resulting RCU callback function "func()", then both CPU A and CPU B are + * guaranteed to execute a full memory barrier during the time interval + * between the call to call_rcu() and the invocation of "func()" -- even + * if CPU A and CPU B are the same CPU (but again only if the system has + * more than one CPU). + * + * Implementation of these memory-ordering guarantees is described here: + * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst. + */ +void call_rcu(struct rcu_head *head, rcu_callback_t func) +{ + return __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY)); +} +EXPORT_SYMBOL_GPL(call_rcu); /* Maximum number of jiffies to wait before draining a batch. */ #define KFREE_DRAIN_JIFFIES (5 * HZ) @@ -3507,7 +3544,7 @@ void synchronize_rcu(void) if (rcu_gp_is_expedited()) synchronize_rcu_expedited(); else - wait_rcu_gp(call_rcu); + wait_rcu_gp(call_rcu_hurry); return; } @@ -3910,7 +3947,7 @@ static void rcu_barrier_entrain(struct rcu_data *rdp) * if it's fully lazy. */ was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist); - WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies)); + WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false)); wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist); if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) { atomic_inc(&rcu_state.barrier_cpu_count); @@ -4336,7 +4373,7 @@ void rcutree_migrate_callbacks(int cpu) my_rdp = this_cpu_ptr(&rcu_data); my_rnp = my_rdp->mynode; rcu_nocb_lock(my_rdp); /* irqs already disabled. */ - WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies)); + WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies, false)); raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */ /* Leverage recent GPs and set GP for new callbacks. */ needwake = rcu_advance_cbs(my_rnp, rdp) || diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 925dd98f8b23..fcb5d696eb17 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -263,14 +263,16 @@ struct rcu_data { unsigned long last_fqs_resched; /* Time of last rcu_resched(). */ unsigned long last_sched_clock; /* Jiffies of last rcu_sched_clock_irq(). */ + long lazy_len; /* Length of buffered lazy callbacks. */ int cpu; }; /* Values for nocb_defer_wakeup field in struct rcu_data. */ #define RCU_NOCB_WAKE_NOT 0 #define RCU_NOCB_WAKE_BYPASS 1 -#define RCU_NOCB_WAKE 2 -#define RCU_NOCB_WAKE_FORCE 3 +#define RCU_NOCB_WAKE_LAZY 2 +#define RCU_NOCB_WAKE 3 +#define RCU_NOCB_WAKE_FORCE 4 #define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500)) /* For jiffies_till_first_fqs and */ @@ -441,9 +443,10 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq); static void rcu_init_one_nocb(struct rcu_node *rnp); static bool wake_nocb_gp(struct rcu_data *rdp, bool force); static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - unsigned long j); + unsigned long j, bool lazy); static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - bool *was_alldone, unsigned long flags); + bool *was_alldone, unsigned long flags, + bool lazy); static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, unsigned long flags); static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level); diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 18e9b4cd78ef..ed6c3cce28f2 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -937,7 +937,7 @@ void synchronize_rcu_expedited(void) /* If expedited grace periods are prohibited, fall back to normal. */ if (rcu_gp_is_normal()) { - wait_rcu_gp(call_rcu); + wait_rcu_gp(call_rcu_hurry); return; } diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 094fd454b6c3..d6e4c076b051 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -256,6 +256,31 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force) return __wake_nocb_gp(rdp_gp, rdp, force, flags); } +/* + * LAZY_FLUSH_JIFFIES decides the maximum amount of time that + * can elapse before lazy callbacks are flushed. Lazy callbacks + * could be flushed much earlier for a number of other reasons + * however, LAZY_FLUSH_JIFFIES will ensure no lazy callbacks are + * left unsubmitted to RCU after those many jiffies. + */ +#define LAZY_FLUSH_JIFFIES (10 * HZ) +static unsigned long jiffies_till_flush = LAZY_FLUSH_JIFFIES; + +#ifdef CONFIG_RCU_LAZY +// To be called only from test code. +void rcu_lazy_set_jiffies_till_flush(unsigned long jif) +{ + jiffies_till_flush = jif; +} +EXPORT_SYMBOL(rcu_lazy_set_jiffies_till_flush); + +unsigned long rcu_lazy_get_jiffies_till_flush(void) +{ + return jiffies_till_flush; +} +EXPORT_SYMBOL(rcu_lazy_get_jiffies_till_flush); +#endif + /* * Arrange to wake the GP kthread for this NOCB group at some future * time when it is safe to do so. @@ -269,10 +294,14 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype, raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); /* - * Bypass wakeup overrides previous deferments. In case - * of callback storm, no need to wake up too early. + * Bypass wakeup overrides previous deferments. In case of + * callback storms, no need to wake up too early. */ - if (waketype == RCU_NOCB_WAKE_BYPASS) { + if (waketype == RCU_NOCB_WAKE_LAZY && + rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) { + mod_timer(&rdp_gp->nocb_timer, jiffies + jiffies_till_flush); + WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); + } else if (waketype == RCU_NOCB_WAKE_BYPASS) { mod_timer(&rdp_gp->nocb_timer, jiffies + 2); WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); } else { @@ -293,10 +322,13 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype, * proves to be initially empty, just return false because the no-CB GP * kthread may need to be awakened in this case. * + * Return true if there was something to be flushed and it succeeded, otherwise + * false. + * * Note that this function always returns true if rhp is NULL. */ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - unsigned long j) + unsigned long j, bool lazy) { struct rcu_cblist rcl; @@ -310,7 +342,20 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, /* Note: ->cblist.len already accounts for ->nocb_bypass contents. */ if (rhp) rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */ - rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp); + + /* + * If the new CB requested was a lazy one, queue it onto the main + * ->cblist so we can take advantage of a sooner grade period. + */ + if (lazy && rhp) { + rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, NULL); + rcu_cblist_enqueue(&rcl, rhp); + WRITE_ONCE(rdp->lazy_len, 0); + } else { + rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp); + WRITE_ONCE(rdp->lazy_len, 0); + } + rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl); WRITE_ONCE(rdp->nocb_bypass_first, j); rcu_nocb_bypass_unlock(rdp); @@ -326,13 +371,13 @@ static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, * Note that this function always returns true if rhp is NULL. */ static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - unsigned long j) + unsigned long j, bool lazy) { if (!rcu_rdp_is_offloaded(rdp)) return true; rcu_lockdep_assert_cblist_protected(rdp); rcu_nocb_bypass_lock(rdp); - return rcu_nocb_do_flush_bypass(rdp, rhp, j); + return rcu_nocb_do_flush_bypass(rdp, rhp, j, lazy); } /* @@ -345,7 +390,7 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j) if (!rcu_rdp_is_offloaded(rdp) || !rcu_nocb_bypass_trylock(rdp)) return; - WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j)); + WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j, false)); } /* @@ -367,12 +412,14 @@ static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j) * there is only one CPU in operation. */ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - bool *was_alldone, unsigned long flags) + bool *was_alldone, unsigned long flags, + bool lazy) { unsigned long c; unsigned long cur_gp_seq; unsigned long j = jiffies; long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); + bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len)); lockdep_assert_irqs_disabled(); @@ -417,25 +464,29 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, // If there hasn't yet been all that many ->cblist enqueues // this jiffy, tell the caller to enqueue onto ->cblist. But flush // ->nocb_bypass first. - if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy) { + // Lazy CBs throttle this back and do immediate bypass queuing. + if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy && !lazy) { rcu_nocb_lock(rdp); *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); if (*was_alldone) trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstQ")); - WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j)); + + WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j, false)); WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); return false; // Caller must enqueue the callback. } // If ->nocb_bypass has been used too long or is too full, // flush ->nocb_bypass to ->cblist. - if ((ncbs && j != READ_ONCE(rdp->nocb_bypass_first)) || + if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) || + (ncbs && bypass_is_lazy && + (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush))) || ncbs >= qhimark) { rcu_nocb_lock(rdp); *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); - if (!rcu_nocb_flush_bypass(rdp, rhp, j)) { + if (!rcu_nocb_flush_bypass(rdp, rhp, j, lazy)) { if (*was_alldone) trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstQ")); @@ -463,13 +514,24 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */ rcu_cblist_enqueue(&rdp->nocb_bypass, rhp); + + if (lazy) + WRITE_ONCE(rdp->lazy_len, rdp->lazy_len + 1); + if (!ncbs) { WRITE_ONCE(rdp->nocb_bypass_first, j); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FirstBQ")); } rcu_nocb_bypass_unlock(rdp); smp_mb(); /* Order enqueue before wake. */ - if (ncbs) { + // A wake up of the grace period kthread or timer adjustment + // needs to be done only if: + // 1. Bypass list was fully empty before (this is the first + // bypass list entry), or: + // 2. Both of these conditions are met: + // a. The bypass list previously had only lazy CBs, and: + // b. The new CB is non-lazy. + if (ncbs && (!bypass_is_lazy || lazy)) { local_irq_restore(flags); } else { // No-CBs GP kthread might be indefinitely asleep, if so, wake. @@ -497,8 +559,10 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, unsigned long flags) __releases(rdp->nocb_lock) { + long bypass_len; unsigned long cur_gp_seq; unsigned long j; + long lazy_len; long len; struct task_struct *t; @@ -512,9 +576,16 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, } // Need to actually to a wakeup. len = rcu_segcblist_n_cbs(&rdp->cblist); + bypass_len = rcu_cblist_n_cbs(&rdp->nocb_bypass); + lazy_len = READ_ONCE(rdp->lazy_len); if (was_alldone) { rdp->qlen_last_fqs_check = len; - if (!irqs_disabled_flags(flags)) { + // Only lazy CBs in bypass list + if (lazy_len && bypass_len == lazy_len) { + rcu_nocb_unlock_irqrestore(rdp, flags); + wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY, + TPS("WakeLazy")); + } else if (!irqs_disabled_flags(flags)) { /* ... if queue was empty ... */ rcu_nocb_unlock_irqrestore(rdp, flags); wake_nocb_gp(rdp, false); @@ -605,12 +676,12 @@ static void nocb_gp_sleep(struct rcu_data *my_rdp, int cpu) static void nocb_gp_wait(struct rcu_data *my_rdp) { bool bypass = false; - long bypass_ncbs; int __maybe_unused cpu = my_rdp->cpu; unsigned long cur_gp_seq; unsigned long flags; bool gotcbs = false; unsigned long j = jiffies; + bool lazy = false; bool needwait_gp = false; // This prevents actual uninitialized use. bool needwake; bool needwake_gp; @@ -640,24 +711,43 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) * won't be ignored for long. */ list_for_each_entry(rdp, &my_rdp->nocb_head_rdp, nocb_entry_rdp) { + long bypass_ncbs; + bool flush_bypass = false; + long lazy_ncbs; + trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check")); rcu_nocb_lock_irqsave(rdp, flags); lockdep_assert_held(&rdp->nocb_lock); bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); - if (bypass_ncbs && + lazy_ncbs = READ_ONCE(rdp->lazy_len); + + if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) && + (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush) || + bypass_ncbs > 2 * qhimark)) { + flush_bypass = true; + } else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) && (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) || bypass_ncbs > 2 * qhimark)) { - // Bypass full or old, so flush it. - (void)rcu_nocb_try_flush_bypass(rdp, j); - bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); + flush_bypass = true; } else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) { rcu_nocb_unlock_irqrestore(rdp, flags); continue; /* No callbacks here, try next. */ } + + if (flush_bypass) { + // Bypass full or old, so flush it. + (void)rcu_nocb_try_flush_bypass(rdp, j); + bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); + lazy_ncbs = READ_ONCE(rdp->lazy_len); + } + if (bypass_ncbs) { trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, - TPS("Bypass")); - bypass = true; + bypass_ncbs == lazy_ncbs ? TPS("Lazy") : TPS("Bypass")); + if (bypass_ncbs == lazy_ncbs) + lazy = true; + else + bypass = true; } rnp = rdp->mynode; @@ -705,12 +795,20 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) my_rdp->nocb_gp_gp = needwait_gp; my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0; - if (bypass && !rcu_nocb_poll) { - // At least one child with non-empty ->nocb_bypass, so set - // timer in order to avoid stranding its callbacks. - wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS, - TPS("WakeBypassIsDeferred")); + // At least one child with non-empty ->nocb_bypass, so set + // timer in order to avoid stranding its callbacks. + if (!rcu_nocb_poll) { + // If bypass list only has lazy CBs. Add a deferred lazy wake up. + if (lazy && !bypass) { + wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_LAZY, + TPS("WakeLazyIsDeferred")); + // Otherwise add a deferred bypass wake up. + } else if (bypass) { + wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS, + TPS("WakeBypassIsDeferred")); + } } + if (rcu_nocb_poll) { /* Polling, so trace if first poll in the series. */ if (gotcbs) @@ -1036,7 +1134,7 @@ static long rcu_nocb_rdp_deoffload(void *arg) * return false, which means that future calls to rcu_nocb_try_bypass() * will refuse to put anything into the bypass. */ - WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies)); + WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false)); /* * Start with invoking rcu_core() early. This way if the current thread * happens to preempt an ongoing call to rcu_core() in the middle, @@ -1278,6 +1376,7 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) raw_spin_lock_init(&rdp->nocb_gp_lock); timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0); rcu_cblist_init(&rdp->nocb_bypass); + WRITE_ONCE(rdp->lazy_len, 0); mutex_init(&rdp->nocb_gp_kthread_mutex); } @@ -1564,13 +1663,13 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force) } static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - unsigned long j) + unsigned long j, bool lazy) { return true; } static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, - bool *was_alldone, unsigned long flags) + bool *was_alldone, unsigned long flags, bool lazy) { return false; } -- cgit v1.2.3