From 4d9a5d4319e22670ec6d6227e12b54f361c46d0f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 11 Oct 2012 01:47:16 +0200 Subject: rcu: Remove rcu_switch() It's only there to call rcu_user_hooks_switch(). Let's just call rcu_user_hooks_switch() directly, we don't need this function in the middle. Signed-off-by: Frederic Weisbecker Cc: Josh Triplett Cc: Peter Zijlstra Cc: Richard Weinberger Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 ++ include/linux/sched.h | 8 -------- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 7c968e4f929e..5d009def7c0c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -204,6 +204,8 @@ static inline void rcu_user_enter(void) { } static inline void rcu_user_exit(void) { } static inline void rcu_user_enter_after_irq(void) { } static inline void rcu_user_exit_after_irq(void) { } +static inline void rcu_user_hooks_switch(struct task_struct *prev, + struct task_struct *next) { } #endif /* CONFIG_RCU_USER_QS */ extern void exit_rcu(void); diff --git a/include/linux/sched.h b/include/linux/sched.h index 0dd42a02df2e..432cc5e1bbee 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1844,14 +1844,6 @@ static inline void rcu_copy_process(struct task_struct *p) #endif -static inline void rcu_switch(struct task_struct *prev, - struct task_struct *next) -{ -#ifdef CONFIG_RCU_USER_QS - rcu_user_hooks_switch(prev, next); -#endif -} - static inline void tsk_restore_flags(struct task_struct *task, unsigned long orig_flags, unsigned long flags) { -- cgit v1.2.3 From 4e87b2d7e887df3fe251dd7f702591a3acf369ca Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sat, 13 Oct 2012 01:14:14 +0800 Subject: srcu: Credit Lai Jiangshan with SRCU rewrite Lai Jiangshan rewrote SRCU, so this commit ensures that he gets his proper share of blame^Wcredit. Signed-off-by: Lai Jiangshan Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 2 ++ kernel/srcu.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 55a5c52cbb25..a55ddb19053c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -16,8 +16,10 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Copyright (C) IBM Corporation, 2006 + * Copyright (C) Fujitsu, 2012 * * Author: Paul McKenney + * Lai Jiangshan * * For detailed explanation of Read-Copy Update mechanism see - * Documentation/RCU/ *.txt diff --git a/kernel/srcu.c b/kernel/srcu.c index 97c465ebd844..0b99f27fa2f7 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c @@ -16,8 +16,10 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Copyright (C) IBM Corporation, 2006 + * Copyright (C) Fujitsu, 2012 * * Author: Paul McKenney + * Lai Jiangshan * * For detailed explanation of Read-Copy Update mechanism see - * Documentation/RCU/ *.txt -- cgit v1.2.3 From f2ebfbc991044fd5b89d4529741d7500feb37fbd Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sat, 13 Oct 2012 01:14:15 +0800 Subject: srcu: Export process_srcu() Because process_srcu() will be used in DEFINE_SRCU(), which is a macro that could be expanded pretty much anywhere, it can no longer be static. Note that process_srcu() is still internal to srcu.h. Signed-off-by: Lai Jiangshan Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 2 ++ kernel/srcu.c | 6 ++---- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index a55ddb19053c..5cce128f196c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -78,6 +78,8 @@ int init_srcu_struct(struct srcu_struct *sp); #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +void process_srcu(struct work_struct *work); + /** * call_srcu() - Queue a callback for invocation after an SRCU grace period * @sp: srcu_struct in queue the callback diff --git a/kernel/srcu.c b/kernel/srcu.c index 0b99f27fa2f7..b363b0924561 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c @@ -94,9 +94,6 @@ static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from) } } -/* single-thread state-machine */ -static void process_srcu(struct work_struct *work); - static int init_srcu_struct_fields(struct srcu_struct *sp) { sp->completed = 0; @@ -639,7 +636,7 @@ static void srcu_reschedule(struct srcu_struct *sp) /* * This is the work-queue function that handles SRCU grace periods. */ -static void process_srcu(struct work_struct *work) +void process_srcu(struct work_struct *work) { struct srcu_struct *sp; @@ -650,3 +647,4 @@ static void process_srcu(struct work_struct *work) srcu_invoke_callbacks(sp); srcu_reschedule(sp); } +EXPORT_SYMBOL_GPL(process_srcu); -- cgit v1.2.3 From b637a328bd4f43a0e146d1eef0142b650ba0d644 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 19 Sep 2012 16:58:38 -0700 Subject: rcu: Print remote CPU's stacks in stall warnings The RCU CPU stall warnings rely on trigger_all_cpu_backtrace() to do NMI-based dump of the stack traces of all CPUs. Unfortunately, a number of architectures do not implement trigger_all_cpu_backtrace(), in which case RCU falls back to just dumping the stack of the running CPU. This is unhelpful in the case where the running CPU has detected that some other CPU has stalled. This commit therefore makes the running CPU dump the stacks of the tasks running on the stalled CPUs. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/sched.h | 2 ++ kernel/rcutree.c | 25 ++++++++++++++++++++++++- kernel/sched/core.c | 6 ++++++ 3 files changed, 32 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0dd42a02df2e..ba69b5adea30 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -109,6 +109,8 @@ extern void update_cpu_load_nohz(void); extern unsigned long get_parent_ip(unsigned long addr); +extern void dump_cpu_task(int cpu); + struct seq_file; struct cfs_rq; struct task_group; diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 74df86bd9204..e78538712df0 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -873,6 +873,29 @@ static void record_gp_stall_check_time(struct rcu_state *rsp) rsp->jiffies_stall = jiffies + jiffies_till_stall_check(); } +/* + * Dump stacks of all tasks running on stalled CPUs. This is a fallback + * for architectures that do not implement trigger_all_cpu_backtrace(). + * The NMI-triggered stack traces are more accurate because they are + * printed by the target CPU. + */ +static void rcu_dump_cpu_stacks(struct rcu_state *rsp) +{ + int cpu; + unsigned long flags; + struct rcu_node *rnp; + + rcu_for_each_leaf_node(rsp, rnp) { + raw_spin_lock_irqsave(&rnp->lock, flags); + if (rnp->qsmask != 0) { + for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) + if (rnp->qsmask & (1UL << cpu)) + dump_cpu_task(rnp->grplo + cpu); + } + raw_spin_unlock_irqrestore(&rnp->lock, flags); + } +} + static void print_other_cpu_stall(struct rcu_state *rsp) { int cpu; @@ -929,7 +952,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) if (ndetected == 0) printk(KERN_ERR "INFO: Stall ended before state dump start\n"); else if (!trigger_all_cpu_backtrace()) - dump_stack(); + rcu_dump_cpu_stacks(rsp); /* Complain about tasks blocking the grace period. */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2d8927fda712..59d08fb1a9e3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8076,3 +8076,9 @@ struct cgroup_subsys cpuacct_subsys = { .base_cftypes = files, }; #endif /* CONFIG_CGROUP_CPUACCT */ + +void dump_cpu_task(int cpu) +{ + pr_info("Task dump for CPU %d:\n", cpu); + sched_show_task(cpu_curr(cpu)); +} -- cgit v1.2.3 From 55c6659afaa6fd79a3b5a7c2b42bb87e0c11209d Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sat, 13 Oct 2012 01:14:16 +0800 Subject: srcu: Add DEFINE_SRCU() In old days, we had two different API sets for dynamic-allocated per-CPU data and DEFINE_PER_CPU()-defined per_cpu data, and because SRCU used dynamic-allocated per-CPU data, its srcu_struct structures cannot be declared statically. This commit therefore introduces DEFINE_SRCU() and DEFINE_STATIC_SRCU() to allow statically declared SRCU structures, using the new static per-CPU interfaces. Signed-off-by: Lai Jiangshan Signed-off-by: Paul E. McKenney [ paulmck: Updated for __DELAYED_WORK_INITIALIZER() added argument, fixed whitespace issue. ] --- include/linux/srcu.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 5cce128f196c..6eb691b08358 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -42,6 +42,8 @@ struct rcu_batch { struct rcu_head *head, **tail; }; +#define RCU_BATCH_INIT(name) { NULL, &(name.head) } + struct srcu_struct { unsigned completed; struct srcu_struct_array __percpu *per_cpu_ref; @@ -72,14 +74,42 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name, __init_srcu_struct((sp), #sp, &__srcu_key); \ }) +#define __SRCU_DEP_MAP_INIT(srcu_name) .dep_map = { .name = #srcu_name }, #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ int init_srcu_struct(struct srcu_struct *sp); +#define __SRCU_DEP_MAP_INIT(srcu_name) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ void process_srcu(struct work_struct *work); +#define __SRCU_STRUCT_INIT(name) \ + { \ + .completed = -300, \ + .per_cpu_ref = &name##_srcu_array, \ + .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ + .running = false, \ + .batch_queue = RCU_BATCH_INIT(name.batch_queue), \ + .batch_check0 = RCU_BATCH_INIT(name.batch_check0), \ + .batch_check1 = RCU_BATCH_INIT(name.batch_check1), \ + .batch_done = RCU_BATCH_INIT(name.batch_done), \ + .work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\ + __SRCU_DEP_MAP_INIT(name) \ + } + +/* + * define and init a srcu struct at build time. + * dont't call init_srcu_struct() nor cleanup_srcu_struct() on it. + */ +#define DEFINE_SRCU(name) \ + static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ + struct srcu_struct name = __SRCU_STRUCT_INIT(name); + +#define DEFINE_STATIC_SRCU(name) \ + static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ + static struct srcu_struct name = __SRCU_STRUCT_INIT(name); + /** * call_srcu() - Queue a callback for invocation after an SRCU grace period * @sp: srcu_struct in queue the callback -- cgit v1.2.3 From bb08f76d8419a163b7a4212a0e4ea6bd25acacd1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 20 Oct 2012 12:33:37 -0700 Subject: rcu: Remove list_for_each_continue_rcu() The list_for_each_continue_rcu() macro is no longer used, so this commit removes it. The list_for_each_entry_continue_rcu() macro should be used instead. Signed-off-by: Paul E. McKenney --- Documentation/RCU/checklist.txt | 17 ++++++++--------- Documentation/RCU/whatisRCU.txt | 4 +--- include/linux/rculist.h | 17 ----------------- 3 files changed, 9 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index cdb20d41a44a..31ef8fe07f82 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -271,15 +271,14 @@ over a rather long period of time, but improvements are always welcome! The same cautions apply to call_rcu_bh() and call_rcu_sched(). 9. All RCU list-traversal primitives, which include - rcu_dereference(), list_for_each_entry_rcu(), - list_for_each_continue_rcu(), and list_for_each_safe_rcu(), - must be either within an RCU read-side critical section or - must be protected by appropriate update-side locks. RCU - read-side critical sections are delimited by rcu_read_lock() - and rcu_read_unlock(), or by similar primitives such as - rcu_read_lock_bh() and rcu_read_unlock_bh(), in which case - the matching rcu_dereference() primitive must be used in order - to keep lockdep happy, in this case, rcu_dereference_bh(). + rcu_dereference(), list_for_each_entry_rcu(), and + list_for_each_safe_rcu(), must be either within an RCU read-side + critical section or must be protected by appropriate update-side + locks. RCU read-side critical sections are delimited by + rcu_read_lock() and rcu_read_unlock(), or by similar primitives + such as rcu_read_lock_bh() and rcu_read_unlock_bh(), in which + case the matching rcu_dereference() primitive must be used in + order to keep lockdep happy, in this case, rcu_dereference_bh(). The reason that it is permissible to use RCU list-traversal primitives when the update-side lock is held is that doing so diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index bf0f6de2aa00..9d30de00d730 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -789,9 +789,7 @@ RCU list traversal: list_for_each_entry_rcu hlist_for_each_entry_rcu hlist_nulls_for_each_entry_rcu - - list_for_each_continue_rcu (to be deprecated in favor of new - list_for_each_entry_continue_rcu) + list_for_each_entry_continue_rcu RCU pointer/list update: diff --git a/include/linux/rculist.h b/include/linux/rculist.h index e0f0fab20415..c92dd28eaa6c 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -286,23 +286,6 @@ static inline void list_splice_init_rcu(struct list_head *list, &pos->member != (head); \ pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) - -/** - * list_for_each_continue_rcu - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * Iterate over an rcu-protected list, continuing after current point. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_continue_rcu(pos, head) \ - for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \ - (pos) != (head); \ - (pos) = rcu_dereference_raw(list_next_rcu(pos))) - /** * list_for_each_entry_continue_rcu - continue iteration over list of given type * @pos: the type * to use as a loop cursor. -- cgit v1.2.3 From f0a0e6f282c72247e7c8ec17c68d528c1bb4d49e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 23 Oct 2012 13:47:01 -0700 Subject: rcu: Clarify memory-ordering properties of grace-period primitives This commit explicitly states the memory-ordering properties of the RCU grace-period primitives. Although these properties were in some sense implied by the fundmental property of RCU ("a grace period must wait for all pre-existing RCU read-side critical sections to complete"), stating it explicitly will be a great labor-saving device. Reported-by: Oleg Nesterov Signed-off-by: Paul E. McKenney Reviewed-by: Oleg Nesterov --- include/linux/rcupdate.h | 25 +++++++++++++++++++++++++ kernel/rcutree.c | 29 +++++++++++++++++++++++++---- kernel/rcutree_plugin.h | 8 ++++++++ 3 files changed, 58 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 7c968e4f929e..6256759fb81e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -90,6 +90,25 @@ extern void do_trace_rcu_torture_read(char *rcutorturename, * that started after call_rcu() was invoked. RCU read-side critical * sections are delimited by rcu_read_lock() and rcu_read_unlock(), * and may be nested. + * + * Note that all CPUs must agree that the grace period extended beyond + * all pre-existing RCU read-side critical section. On systems with more + * than one CPU, this means that when "func()" is invoked, each CPU is + * guaranteed to have executed a full memory barrier since the end of its + * last RCU read-side critical section whose beginning preceded the call + * to call_rcu(). It also means that each CPU executing an RCU read-side + * critical section that continues beyond the start of "func()" must have + * executed a memory barrier after the call_rcu() but before the beginning + * of that RCU read-side critical section. Note that these guarantees + * include CPUs that are offline, idle, or executing in user mode, as + * well as CPUs that are executing in the kernel. + * + * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the + * resulting RCU callback function "func()", then both CPU A and CPU B are + * guaranteed to execute a full memory barrier during the time interval + * between the call to call_rcu() and the invocation of "func()" -- even + * if CPU A and CPU B are the same CPU (but again only if the system has + * more than one CPU). */ extern void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head)); @@ -118,6 +137,9 @@ extern void call_rcu(struct rcu_head *head, * OR * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. * These may be nested. + * + * See the description of call_rcu() for more detailed information on + * memory ordering guarantees. */ extern void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *head)); @@ -137,6 +159,9 @@ extern void call_rcu_bh(struct rcu_head *head, * OR * anything that disables preemption. * These may be nested. + * + * See the description of call_rcu() for more detailed information on + * memory ordering guarantees. */ extern void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e4c2192b47c8..15a2beec320f 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -2228,10 +2228,28 @@ static inline int rcu_blocking_is_gp(void) * rcu_read_lock_sched(). * * This means that all preempt_disable code sequences, including NMI and - * hardware-interrupt handlers, in progress on entry will have completed - * before this primitive returns. However, this does not guarantee that - * softirq handlers will have completed, since in some kernels, these - * handlers can run in process context, and can block. + * non-threaded hardware-interrupt handlers, in progress on entry will + * have completed before this primitive returns. However, this does not + * guarantee that softirq handlers will have completed, since in some + * kernels, these handlers can run in process context, and can block. + * + * Note that this guarantee implies further memory-ordering guarantees. + * On systems with more than one CPU, when synchronize_sched() returns, + * each CPU is guaranteed to have executed a full memory barrier since the + * end of its last RCU-sched read-side critical section whose beginning + * preceded the call to synchronize_sched(). In addition, each CPU having + * an RCU read-side critical section that extends beyond the return from + * synchronize_sched() is guaranteed to have executed a full memory barrier + * after the beginning of synchronize_sched() and before the beginning of + * that RCU read-side critical section. Note that these guarantees include + * CPUs that are offline, idle, or executing in user mode, as well as CPUs + * that are executing in the kernel. + * + * Furthermore, if CPU A invoked synchronize_sched(), which returned + * to its caller on CPU B, then both CPU A and CPU B are guaranteed + * to have executed a full memory barrier during the execution of + * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but + * again only if the system has more than one CPU). * * This primitive provides the guarantees made by the (now removed) * synchronize_kernel() API. In contrast, synchronize_rcu() only @@ -2259,6 +2277,9 @@ EXPORT_SYMBOL_GPL(synchronize_sched); * read-side critical sections have completed. RCU read-side critical * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), * and may be nested. + * + * See the description of synchronize_sched() for more detailed information + * on memory ordering guarantees. */ void synchronize_rcu_bh(void) { diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index f92115488187..57e0ef8ed721 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -670,6 +670,9 @@ EXPORT_SYMBOL_GPL(kfree_call_rcu); * concurrently with new RCU read-side critical sections that began while * synchronize_rcu() was waiting. RCU read-side critical sections are * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. + * + * See the description of synchronize_sched() for more detailed information + * on memory ordering guarantees. */ void synchronize_rcu(void) { @@ -875,6 +878,11 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); /** * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete. + * + * Note that this primitive does not necessarily wait for an RCU grace period + * to complete. For example, if there are no RCU callbacks queued anywhere + * in the system, then rcu_barrier() is within its rights to return + * immediately, without waiting for anything, much less an RCU grace period. */ void rcu_barrier(void) { -- cgit v1.2.3 From 91d1aa43d30505b0b825db8898ffc80a8eca96c7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 27 Nov 2012 19:33:25 +0100 Subject: context_tracking: New context tracking susbsystem Create a new subsystem that probes on kernel boundaries to keep track of the transitions between level contexts with two basic initial contexts: user or kernel. This is an abstraction of some RCU code that use such tracking to implement its userspace extended quiescent state. We need to pull this up from RCU into this new level of indirection because this tracking is also going to be used to implement an "on demand" generic virtual cputime accounting. A necessary step to shutdown the tick while still accounting the cputime. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Li Zhong Cc: Gilad Ben-Yossef Reviewed-by: Steven Rostedt [ paulmck: fix whitespace error and email address. ] Signed-off-by: Paul E. McKenney --- arch/Kconfig | 15 +++--- arch/x86/Kconfig | 2 +- arch/x86/include/asm/context_tracking.h | 31 ++++++++++++ arch/x86/include/asm/rcu.h | 32 ------------- arch/x86/kernel/entry_64.S | 2 +- arch/x86/kernel/ptrace.c | 8 ++-- arch/x86/kernel/signal.c | 5 +- arch/x86/kernel/traps.c | 2 +- arch/x86/mm/fault.c | 2 +- include/linux/context_tracking.h | 18 +++++++ include/linux/rcupdate.h | 2 - init/Kconfig | 28 +++++------ kernel/Makefile | 1 + kernel/context_tracking.c | 83 +++++++++++++++++++++++++++++++++ kernel/rcutree.c | 64 +------------------------ kernel/sched/core.c | 11 +++-- 16 files changed, 174 insertions(+), 132 deletions(-) create mode 100644 arch/x86/include/asm/context_tracking.h delete mode 100644 arch/x86/include/asm/rcu.h create mode 100644 include/linux/context_tracking.h create mode 100644 kernel/context_tracking.c (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index 366ec06a5185..cc74aaea116c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -300,15 +300,16 @@ config SECCOMP_FILTER See Documentation/prctl/seccomp_filter.txt for details. -config HAVE_RCU_USER_QS +config HAVE_CONTEXT_TRACKING bool help - Provide kernel entry/exit hooks necessary for userspace - RCU extended quiescent state. Syscalls need to be wrapped inside - rcu_user_exit()-rcu_user_enter() through the slow path using - TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs - are already protected inside rcu_irq_enter/rcu_irq_exit() but - preemption or signal handling on irq exit still need to be protected. + Provide kernel/user boundaries probes necessary for subsystems + that need it, such as userspace RCU extended quiescent state. + Syscalls need to be wrapped inside user_exit()-user_enter() through + the slow path using TIF_NOHZ flag. Exceptions handlers must be + wrapped as well. Irqs are already protected inside + rcu_irq_enter/rcu_irq_exit() but preemption or signal handling on + irq exit still need to be protected. config HAVE_VIRT_CPU_ACCOUNTING bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 46c3bff3ced2..110cfad24f26 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -106,7 +106,7 @@ config X86 select KTIME_SCALAR if X86_32 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER - select HAVE_RCU_USER_QS if X86_64 + select HAVE_CONTEXT_TRACKING if X86_64 select HAVE_IRQ_TIME_ACCOUNTING select GENERIC_KERNEL_THREAD select GENERIC_KERNEL_EXECVE diff --git a/arch/x86/include/asm/context_tracking.h b/arch/x86/include/asm/context_tracking.h new file mode 100644 index 000000000000..1616562683e9 --- /dev/null +++ b/arch/x86/include/asm/context_tracking.h @@ -0,0 +1,31 @@ +#ifndef _ASM_X86_CONTEXT_TRACKING_H +#define _ASM_X86_CONTEXT_TRACKING_H + +#ifndef __ASSEMBLY__ +#include +#include + +static inline void exception_enter(struct pt_regs *regs) +{ + user_exit(); +} + +static inline void exception_exit(struct pt_regs *regs) +{ +#ifdef CONFIG_CONTEXT_TRACKING + if (user_mode(regs)) + user_enter(); +#endif +} + +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_CONTEXT_TRACKING +# define SCHEDULE_USER call schedule_user +#else +# define SCHEDULE_USER call schedule +#endif + +#endif /* !__ASSEMBLY__ */ + +#endif diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/rcu.h deleted file mode 100644 index d1ac07a23979..000000000000 --- a/arch/x86/include/asm/rcu.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef _ASM_X86_RCU_H -#define _ASM_X86_RCU_H - -#ifndef __ASSEMBLY__ - -#include -#include - -static inline void exception_enter(struct pt_regs *regs) -{ - rcu_user_exit(); -} - -static inline void exception_exit(struct pt_regs *regs) -{ -#ifdef CONFIG_RCU_USER_QS - if (user_mode(regs)) - rcu_user_enter(); -#endif -} - -#else /* __ASSEMBLY__ */ - -#ifdef CONFIG_RCU_USER_QS -# define SCHEDULE_USER call schedule_user -#else -# define SCHEDULE_USER call schedule -#endif - -#endif /* !__ASSEMBLY__ */ - -#endif diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 0c58952d64e8..98faeb30139d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -56,7 +56,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index eff5b8c68652..65b88a5dc1a8 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include @@ -1461,7 +1461,7 @@ long syscall_trace_enter(struct pt_regs *regs) { long ret = 0; - rcu_user_exit(); + user_exit(); /* * If we stepped into a sysenter/syscall insn, it trapped in @@ -1516,7 +1516,7 @@ void syscall_trace_leave(struct pt_regs *regs) * or do_notify_resume(), in which case we can be in RCU * user mode. */ - rcu_user_exit(); + user_exit(); audit_syscall_exit(regs); @@ -1534,5 +1534,5 @@ void syscall_trace_leave(struct pt_regs *regs) if (step || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, step); - rcu_user_enter(); + user_enter(); } diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 29ad351804e9..20ecac112e74 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -816,7 +817,7 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { - rcu_user_exit(); + user_exit(); #ifdef CONFIG_X86_MCE /* notify userspace of pending MCEs */ @@ -840,7 +841,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) fire_user_return_notifiers(); - rcu_user_enter(); + user_enter(); } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 8276dc6794cc..eb8586693e0b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -55,7 +55,7 @@ #include #include #include -#include +#include #include diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 8e13ecb41bee..7a529cbab7ad 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -18,7 +18,7 @@ #include /* pgd_*(), ... */ #include /* kmemcheck_*(), ... */ #include /* VSYSCALL_START */ -#include /* exception_enter(), ... */ +#include /* exception_enter(), ... */ /* * Page fault error code bits: diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h new file mode 100644 index 000000000000..e24339ccb7f0 --- /dev/null +++ b/include/linux/context_tracking.h @@ -0,0 +1,18 @@ +#ifndef _LINUX_CONTEXT_TRACKING_H +#define _LINUX_CONTEXT_TRACKING_H + +#ifdef CONFIG_CONTEXT_TRACKING +#include + +extern void user_enter(void); +extern void user_exit(void); +extern void context_tracking_task_switch(struct task_struct *prev, + struct task_struct *next); +#else +static inline void user_enter(void) { } +static inline void user_exit(void) { } +static inline void context_tracking_task_switch(struct task_struct *prev, + struct task_struct *next) { } +#endif /* !CONFIG_CONTEXT_TRACKING */ + +#endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 8fe7c1840d30..275aa3f1062d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -222,8 +222,6 @@ extern void rcu_user_enter(void); extern void rcu_user_exit(void); extern void rcu_user_enter_after_irq(void); extern void rcu_user_exit_after_irq(void); -extern void rcu_user_hooks_switch(struct task_struct *prev, - struct task_struct *next); #else static inline void rcu_user_enter(void) { } static inline void rcu_user_exit(void) { } diff --git a/init/Kconfig b/init/Kconfig index 5ac6ee094225..2054e048bb98 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -486,9 +486,13 @@ config PREEMPT_RCU This option enables preemptible-RCU code that is common between the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. +config CONTEXT_TRACKING + bool + config RCU_USER_QS bool "Consider userspace as in RCU extended quiescent state" - depends on HAVE_RCU_USER_QS && SMP + depends on HAVE_CONTEXT_TRACKING && SMP + select CONTEXT_TRACKING help This option sets hooks on kernel / userspace boundaries and puts RCU in extended quiescent state when the CPU runs in @@ -497,24 +501,20 @@ config RCU_USER_QS try to keep the timer tick on for RCU. Unless you want to hack and help the development of the full - tickless feature, you shouldn't enable this option. It also + dynticks mode, you shouldn't enable this option. It also adds unnecessary overhead. If unsure say N -config RCU_USER_QS_FORCE - bool "Force userspace extended QS by default" - depends on RCU_USER_QS +config CONTEXT_TRACKING_FORCE + bool "Force context tracking" + depends on CONTEXT_TRACKING help - Set the hooks in user/kernel boundaries by default in order to - test this feature that treats userspace as an extended quiescent - state until we have a real user like a full adaptive nohz option. - - Unless you want to hack and help the development of the full - tickless feature, you shouldn't enable this option. It adds - unnecessary overhead. - - If unsure say N + Probe on user/kernel boundaries by default in order to + test the features that rely on it such as userspace RCU extended + quiescent states. + This test is there for debugging until we have a real user like the + full dynticks mode. config RCU_FANOUT int "Tree-based hierarchical RCU fanout value" diff --git a/kernel/Makefile b/kernel/Makefile index 0dfeca4324ee..f90bbfc9727f 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -110,6 +110,7 @@ obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o obj-$(CONFIG_PADATA) += padata.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o +obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o $(obj)/configs.o: $(obj)/config_data.h diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c new file mode 100644 index 000000000000..e0e07fd55508 --- /dev/null +++ b/kernel/context_tracking.c @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include + +struct context_tracking { + /* + * When active is false, hooks are not set to + * minimize overhead: TIF flags are cleared + * and calls to user_enter/exit are ignored. This + * may be further optimized using static keys. + */ + bool active; + enum { + IN_KERNEL = 0, + IN_USER, + } state; +}; + +static DEFINE_PER_CPU(struct context_tracking, context_tracking) = { +#ifdef CONFIG_CONTEXT_TRACKING_FORCE + .active = true, +#endif +}; + +void user_enter(void) +{ + unsigned long flags; + + /* + * Some contexts may involve an exception occuring in an irq, + * leading to that nesting: + * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() + * This would mess up the dyntick_nesting count though. And rcu_irq_*() + * helpers are enough to protect RCU uses inside the exception. So + * just return immediately if we detect we are in an IRQ. + */ + if (in_interrupt()) + return; + + WARN_ON_ONCE(!current->mm); + + local_irq_save(flags); + if (__this_cpu_read(context_tracking.active) && + __this_cpu_read(context_tracking.state) != IN_USER) { + __this_cpu_write(context_tracking.state, IN_USER); + rcu_user_enter(); + } + local_irq_restore(flags); +} + +void user_exit(void) +{ + unsigned long flags; + + /* + * Some contexts may involve an exception occuring in an irq, + * leading to that nesting: + * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() + * This would mess up the dyntick_nesting count though. And rcu_irq_*() + * helpers are enough to protect RCU uses inside the exception. So + * just return immediately if we detect we are in an IRQ. + */ + if (in_interrupt()) + return; + + local_irq_save(flags); + if (__this_cpu_read(context_tracking.state) == IN_USER) { + __this_cpu_write(context_tracking.state, IN_KERNEL); + rcu_user_exit(); + } + local_irq_restore(flags); +} + +void context_tracking_task_switch(struct task_struct *prev, + struct task_struct *next) +{ + if (__this_cpu_read(context_tracking.active)) { + clear_tsk_thread_flag(prev, TIF_NOHZ); + set_tsk_thread_flag(next, TIF_NOHZ); + } +} diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 7733eb56e156..e441b77b614e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -207,9 +207,6 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch); DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, .dynticks = ATOMIC_INIT(1), -#if defined(CONFIG_RCU_USER_QS) && !defined(CONFIG_RCU_USER_QS_FORCE) - .ignore_user_qs = true, -#endif }; static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ @@ -420,29 +417,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter); */ void rcu_user_enter(void) { - unsigned long flags; - struct rcu_dynticks *rdtp; - - /* - * Some contexts may involve an exception occuring in an irq, - * leading to that nesting: - * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() - * This would mess up the dyntick_nesting count though. And rcu_irq_*() - * helpers are enough to protect RCU uses inside the exception. So - * just return immediately if we detect we are in an IRQ. - */ - if (in_interrupt()) - return; - - WARN_ON_ONCE(!current->mm); - - local_irq_save(flags); - rdtp = &__get_cpu_var(rcu_dynticks); - if (!rdtp->ignore_user_qs && !rdtp->in_user) { - rdtp->in_user = true; - rcu_eqs_enter(true); - } - local_irq_restore(flags); + rcu_eqs_enter(1); } /** @@ -579,27 +554,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit); */ void rcu_user_exit(void) { - unsigned long flags; - struct rcu_dynticks *rdtp; - - /* - * Some contexts may involve an exception occuring in an irq, - * leading to that nesting: - * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() - * This would mess up the dyntick_nesting count though. And rcu_irq_*() - * helpers are enough to protect RCU uses inside the exception. So - * just return immediately if we detect we are in an IRQ. - */ - if (in_interrupt()) - return; - - local_irq_save(flags); - rdtp = &__get_cpu_var(rcu_dynticks); - if (rdtp->in_user) { - rdtp->in_user = false; - rcu_eqs_exit(true); - } - local_irq_restore(flags); + rcu_eqs_exit(1); } /** @@ -722,21 +677,6 @@ int rcu_is_cpu_idle(void) } EXPORT_SYMBOL(rcu_is_cpu_idle); -#ifdef CONFIG_RCU_USER_QS -void rcu_user_hooks_switch(struct task_struct *prev, - struct task_struct *next) -{ - struct rcu_dynticks *rdtp; - - /* Interrupts are disabled in context switch */ - rdtp = &__get_cpu_var(rcu_dynticks); - if (!rdtp->ignore_user_qs) { - clear_tsk_thread_flag(prev, TIF_NOHZ); - set_tsk_thread_flag(next, TIF_NOHZ); - } -} -#endif /* #ifdef CONFIG_RCU_USER_QS */ - #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 36f260864f65..80f80dfca70e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -72,6 +72,7 @@ #include #include #include +#include #include #include @@ -1886,8 +1887,8 @@ context_switch(struct rq *rq, struct task_struct *prev, spin_release(&rq->lock.dep_map, 1, _THIS_IP_); #endif + context_tracking_task_switch(prev, next); /* Here we just switch the register state and the stack. */ - rcu_user_hooks_switch(prev, next); switch_to(prev, next, prev); barrier(); @@ -2911,7 +2912,7 @@ asmlinkage void __sched schedule(void) } EXPORT_SYMBOL(schedule); -#ifdef CONFIG_RCU_USER_QS +#ifdef CONFIG_CONTEXT_TRACKING asmlinkage void __sched schedule_user(void) { /* @@ -2920,9 +2921,9 @@ asmlinkage void __sched schedule_user(void) * we haven't yet exited the RCU idle mode. Do it here manually until * we find a better solution. */ - rcu_user_exit(); + user_exit(); schedule(); - rcu_user_enter(); + user_enter(); } #endif @@ -3027,7 +3028,7 @@ asmlinkage void __sched preempt_schedule_irq(void) /* Catch callers which need to be fixed */ BUG_ON(ti->preempt_count || !irqs_disabled()); - rcu_user_exit(); + user_exit(); do { add_preempt_count(PREEMPT_ACTIVE); local_irq_enable(); -- cgit v1.2.3