From 252babcd52aabe37aaad03685e7d6ad454edb9f9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 6 Apr 2017 12:11:36 -0400 Subject: tracing: Replace the per_cpu() with __this_cpu*() in trace_stack.c The updates to the trace_active per cpu variable can be updated with the __this_cpu_*() functions as it only gets updated on the CPU that the variable is on. Thanks to Paul McKenney for suggesting __this_cpu_* instead of this_cpu_*. Acked-by: Paul E. McKenney Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_stack.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) (limited to 'kernel/trace/trace_stack.c') diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 5fb1f2c87e6b..338d076a06da 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -207,13 +207,12 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) { unsigned long stack; - int cpu; preempt_disable_notrace(); - cpu = raw_smp_processor_id(); /* no atomic needed, we only modify this variable by this cpu */ - if (per_cpu(trace_active, cpu)++ != 0) + __this_cpu_inc(trace_active); + if (__this_cpu_read(trace_active) != 1) goto out; ip += MCOUNT_INSN_SIZE; @@ -221,7 +220,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip, check_stack(ip, &stack); out: - per_cpu(trace_active, cpu)--; + __this_cpu_dec(trace_active); /* prevent recursion in schedule */ preempt_enable_notrace(); } @@ -253,7 +252,6 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, long *ptr = filp->private_data; unsigned long val, flags; int ret; - int cpu; ret = kstrtoul_from_user(ubuf, count, 10, &val); if (ret) @@ -266,14 +264,13 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, * we will cause circular lock, so we also need to increase * the percpu trace_active here. */ - cpu = smp_processor_id(); - per_cpu(trace_active, cpu)++; + __this_cpu_inc(trace_active); arch_spin_lock(&stack_trace_max_lock); *ptr = val; arch_spin_unlock(&stack_trace_max_lock); - per_cpu(trace_active, cpu)--; + __this_cpu_dec(trace_active); local_irq_restore(flags); return count; @@ -307,12 +304,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos) static void *t_start(struct seq_file *m, loff_t *pos) { - int cpu; - local_irq_disable(); - cpu = smp_processor_id(); - per_cpu(trace_active, cpu)++; + __this_cpu_inc(trace_active); arch_spin_lock(&stack_trace_max_lock); @@ -324,12 +318,9 @@ static void *t_start(struct seq_file *m, loff_t *pos) static void t_stop(struct seq_file *m, void *p) { - int cpu; - arch_spin_unlock(&stack_trace_max_lock); - cpu = smp_processor_id(); - per_cpu(trace_active, cpu)--; + __this_cpu_dec(trace_active); local_irq_enable(); } -- cgit v1.2.3 From 5367278cb7ba74537bcad1470d75f30d95b09c14 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 6 Apr 2017 12:26:20 -0400 Subject: tracing: Add stack_tracer_disable/enable() functions There are certain parts of the kernel that cannot let stack tracing proceed (namely in RCU), because the stack tracer uses RCU, and parts of RCU internals cannot handle having RCU read side locks taken. Add stack_tracer_disable() and stack_tracer_enable() functions to let RCU stop stack tracing on the current CPU when it is in those critical sections. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 6 ++++++ kernel/trace/trace_stack.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'kernel/trace/trace_stack.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ef7123219f14..7b4e6572ab21 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -286,6 +286,12 @@ int stack_trace_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); + +void stack_tracer_disable(void); +void stack_tracer_enable(void); +#else +static inline void stack_tracer_disable(void) { } +static inline void stack_tracer_enable(void) { } #endif struct ftrace_func_command { diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 338d076a06da..21e536cf66e4 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -41,6 +41,38 @@ static DEFINE_MUTEX(stack_sysctl_mutex); int stack_tracer_enabled; static int last_stack_tracer_enabled; +/** + * stack_tracer_disable - temporarily disable the stack tracer + * + * There's a few locations (namely in RCU) where stack tracing + * cannot be executed. This function is used to disable stack + * tracing during those critical sections. + * + * This function must be called with preemption or interrupts + * disabled and stack_tracer_enable() must be called shortly after + * while preemption or interrupts are still disabled. + */ +void stack_tracer_disable(void) +{ + /* Preemption or interupts must be disabled */ + if (IS_ENABLED(CONFIG_PREEMPT_DEBUG)) + WARN_ON_ONCE(!preempt_count() || !irqs_disabled()); + this_cpu_inc(trace_active); +} + +/** + * stack_tracer_enable - re-enable the stack tracer + * + * After stack_tracer_disable() is called, stack_tracer_enable() + * must be called shortly afterward. + */ +void stack_tracer_enable(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_DEBUG)) + WARN_ON_ONCE(!preempt_count() || !irqs_disabled()); + this_cpu_dec(trace_active); +} + void stack_trace_print(void) { long i; -- cgit v1.2.3 From 8aaf1ee70e19ac74cbbb81098edfa328d1ab4bd7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 6 Apr 2017 15:47:32 -0400 Subject: tracing: Rename trace_active to disable_stack_tracer and inline its modification In order to eliminate a function call, make "trace_active" into "disable_stack_tracer" and convert stack_tracer_disable() and friends into static inline functions. Acked-by: Paul E. McKenney Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 36 +++++++++++++++++++++++++++++++-- kernel/trace/trace_stack.c | 50 +++++++++------------------------------------- 2 files changed, 43 insertions(+), 43 deletions(-) (limited to 'kernel/trace/trace_stack.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7b4e6572ab21..06b2990a35e4 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -287,8 +287,40 @@ stack_trace_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -void stack_tracer_disable(void); -void stack_tracer_enable(void); +/* DO NOT MODIFY THIS VARIABLE DIRECTLY! */ +DECLARE_PER_CPU(int, disable_stack_tracer); + +/** + * stack_tracer_disable - temporarily disable the stack tracer + * + * There's a few locations (namely in RCU) where stack tracing + * cannot be executed. This function is used to disable stack + * tracing during those critical sections. + * + * This function must be called with preemption or interrupts + * disabled and stack_tracer_enable() must be called shortly after + * while preemption or interrupts are still disabled. + */ +static inline void stack_tracer_disable(void) +{ + /* Preemption or interupts must be disabled */ + if (IS_ENABLED(CONFIG_PREEMPT_DEBUG)) + WARN_ON_ONCE(!preempt_count() || !irqs_disabled()); + this_cpu_inc(disable_stack_tracer); +} + +/** + * stack_tracer_enable - re-enable the stack tracer + * + * After stack_tracer_disable() is called, stack_tracer_enable() + * must be called shortly afterward. + */ +static inline void stack_tracer_enable(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_DEBUG)) + WARN_ON_ONCE(!preempt_count() || !irqs_disabled()); + this_cpu_dec(disable_stack_tracer); +} #else static inline void stack_tracer_disable(void) { } static inline void stack_tracer_enable(void) { } diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 21e536cf66e4..f2f02ff350d4 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -35,44 +35,12 @@ unsigned long stack_trace_max_size; arch_spinlock_t stack_trace_max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; -static DEFINE_PER_CPU(int, trace_active); +DEFINE_PER_CPU(int, disable_stack_tracer); static DEFINE_MUTEX(stack_sysctl_mutex); int stack_tracer_enabled; static int last_stack_tracer_enabled; -/** - * stack_tracer_disable - temporarily disable the stack tracer - * - * There's a few locations (namely in RCU) where stack tracing - * cannot be executed. This function is used to disable stack - * tracing during those critical sections. - * - * This function must be called with preemption or interrupts - * disabled and stack_tracer_enable() must be called shortly after - * while preemption or interrupts are still disabled. - */ -void stack_tracer_disable(void) -{ - /* Preemption or interupts must be disabled */ - if (IS_ENABLED(CONFIG_PREEMPT_DEBUG)) - WARN_ON_ONCE(!preempt_count() || !irqs_disabled()); - this_cpu_inc(trace_active); -} - -/** - * stack_tracer_enable - re-enable the stack tracer - * - * After stack_tracer_disable() is called, stack_tracer_enable() - * must be called shortly afterward. - */ -void stack_tracer_enable(void) -{ - if (IS_ENABLED(CONFIG_PREEMPT_DEBUG)) - WARN_ON_ONCE(!preempt_count() || !irqs_disabled()); - this_cpu_dec(trace_active); -} - void stack_trace_print(void) { long i; @@ -243,8 +211,8 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip, preempt_disable_notrace(); /* no atomic needed, we only modify this variable by this cpu */ - __this_cpu_inc(trace_active); - if (__this_cpu_read(trace_active) != 1) + __this_cpu_inc(disable_stack_tracer); + if (__this_cpu_read(disable_stack_tracer) != 1) goto out; ip += MCOUNT_INSN_SIZE; @@ -252,7 +220,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip, check_stack(ip, &stack); out: - __this_cpu_dec(trace_active); + __this_cpu_dec(disable_stack_tracer); /* prevent recursion in schedule */ preempt_enable_notrace(); } @@ -294,15 +262,15 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, /* * In case we trace inside arch_spin_lock() or after (NMI), * we will cause circular lock, so we also need to increase - * the percpu trace_active here. + * the percpu disable_stack_tracer here. */ - __this_cpu_inc(trace_active); + __this_cpu_inc(disable_stack_tracer); arch_spin_lock(&stack_trace_max_lock); *ptr = val; arch_spin_unlock(&stack_trace_max_lock); - __this_cpu_dec(trace_active); + __this_cpu_dec(disable_stack_tracer); local_irq_restore(flags); return count; @@ -338,7 +306,7 @@ static void *t_start(struct seq_file *m, loff_t *pos) { local_irq_disable(); - __this_cpu_inc(trace_active); + __this_cpu_inc(disable_stack_tracer); arch_spin_lock(&stack_trace_max_lock); @@ -352,7 +320,7 @@ static void t_stop(struct seq_file *m, void *p) { arch_spin_unlock(&stack_trace_max_lock); - __this_cpu_dec(trace_active); + __this_cpu_dec(disable_stack_tracer); local_irq_enable(); } -- cgit v1.2.3 From 03ecd3f48e57f2e6154584e0ee7450d7a05e2d3b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 7 Apr 2017 12:20:36 -0400 Subject: rcu/tracing: Add rcu_disabled to denote when rcu_irq_enter() will not work Tracing uses rcu_irq_enter() as a way to make sure that RCU is watching when it needs to use rcu_read_lock() and friends. This is because tracing can happen as RCU is about to enter user space, or about to go idle, and RCU does not watch for RCU read side critical sections as it makes the transition. There is a small location within the RCU infrastructure that rcu_irq_enter() itself will not work. If tracing were to occur in that section it will break if it tries to use rcu_irq_enter(). Originally, this happens with the stack_tracer, because it will call save_stack_trace when it encounters stack usage that is greater than any stack usage it had encountered previously. There was a case where that happened in the RCU section where rcu_irq_enter() did not work, and lockdep complained loudly about it. To fix it, stack tracing added a call to be disabled and RCU would disable stack tracing during the critical section that rcu_irq_enter() was inoperable. This solution worked, but there are other cases that use rcu_irq_enter() and it would be a good idea to let RCU give a way to let others know that rcu_irq_enter() will not work. For example, in trace events. Another helpful aspect of this change is that it also moves the per cpu variable called in the RCU critical section into a cache locale along with other RCU per cpu variables used in that same location. I'm keeping the stack_trace_disable() code, as that still could be used in the future by places that really need to disable it. And since it's only a static inline, it wont take up any kernel text if it is not used. Link: http://lkml.kernel.org/r/20170405093207.404f8deb@gandalf.local.home Acked-by: Paul E. McKenney Signed-off-by: Steven Rostedt (VMware) --- include/linux/rcupdate.h | 5 +++++ kernel/rcu/tree.c | 18 ++++++++++++++++-- kernel/trace/trace_stack.c | 8 ++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) (limited to 'kernel/trace/trace_stack.c') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index de88b33c0974..dea8f17b2fe3 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -97,6 +97,7 @@ void do_trace_rcu_torture_read(const char *rcutorturename, unsigned long secs, unsigned long c_old, unsigned long c); +bool rcu_irq_enter_disabled(void); #else static inline void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, @@ -113,6 +114,10 @@ static inline void rcutorture_record_test_transition(void) static inline void rcutorture_record_progress(unsigned long vernum) { } +static inline bool rcu_irq_enter_disabled(void) +{ + return false; +} #ifdef CONFIG_RCU_TRACE void do_trace_rcu_torture_read(const char *rcutorturename, struct rcu_head *rhp, diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8b4d273331e4..a6dcf3bd244f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -284,6 +284,20 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { #endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ }; +/* + * There's a few places, currently just in the tracing infrastructure, + * that uses rcu_irq_enter() to make sure RCU is watching. But there's + * a small location where that will not even work. In those cases + * rcu_irq_enter_disabled() needs to be checked to make sure rcu_irq_enter() + * can be called. + */ +static DEFINE_PER_CPU(bool, disable_rcu_irq_enter); + +bool rcu_irq_enter_disabled(void) +{ + return this_cpu_read(disable_rcu_irq_enter); +} + /* * Record entry into an extended quiescent state. This is only to be * called when not already in an extended quiescent state. @@ -800,10 +814,10 @@ static void rcu_eqs_enter_common(bool user) do_nocb_deferred_wakeup(rdp); } rcu_prepare_for_idle(); - stack_tracer_disable(); + __this_cpu_inc(disable_rcu_irq_enter); rdtp->dynticks_nesting = 0; /* Breaks tracing momentarily. */ rcu_dynticks_eqs_enter(); /* After this, tracing works again. */ - stack_tracer_enable(); + __this_cpu_dec(disable_rcu_irq_enter); rcu_dynticks_task_enter(); /* diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index f2f02ff350d4..76aa04d4c925 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -96,6 +96,14 @@ check_stack(unsigned long ip, unsigned long *stack) if (in_nmi()) return; + /* + * There's a slight chance that we are tracing inside the + * RCU infrastructure, and rcu_irq_enter() will not work + * as expected. + */ + if (unlikely(rcu_irq_enter_disabled())) + return; + local_irq_save(flags); arch_spin_lock(&stack_trace_max_lock); -- cgit v1.2.3