From c0a581d7126c0bbc96163276f585fd7b4e4d8d0e Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 22 Sep 2022 10:56:22 -0400 Subject: tracing: Disable interrupt or preemption before acquiring arch_spinlock_t It was found that some tracing functions in kernel/trace/trace.c acquire an arch_spinlock_t with preemption and irqs enabled. An example is the tracing_saved_cmdlines_size_read() function which intermittently causes a "BUG: using smp_processor_id() in preemptible" warning when the LTP read_all_proc test is run. That can be problematic in case preemption happens after acquiring the lock. Add the necessary preemption or interrupt disabling code in the appropriate places before acquiring an arch_spinlock_t. The convention here is to disable preemption for trace_cmdline_lock and interupt for max_lock. Link: https://lkml.kernel.org/r/20220922145622.1744826-1-longman@redhat.com Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Will Deacon Cc: Boqun Feng Cc: stable@vger.kernel.org Fixes: a35873a0993b ("tracing: Add conditional snapshot") Fixes: 939c7a4f04fc ("tracing: Introduce saved_cmdlines_size file") Suggested-by: Steven Rostedt Signed-off-by: Waiman Long Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d3005279165d..aed7ea6e6045 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1193,12 +1193,14 @@ void *tracing_cond_snapshot_data(struct trace_array *tr) { void *cond_data = NULL; + local_irq_disable(); arch_spin_lock(&tr->max_lock); if (tr->cond_snapshot) cond_data = tr->cond_snapshot->cond_data; arch_spin_unlock(&tr->max_lock); + local_irq_enable(); return cond_data; } @@ -1334,9 +1336,11 @@ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, goto fail_unlock; } + local_irq_disable(); arch_spin_lock(&tr->max_lock); tr->cond_snapshot = cond_snapshot; arch_spin_unlock(&tr->max_lock); + local_irq_enable(); mutex_unlock(&trace_types_lock); @@ -1363,6 +1367,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr) { int ret = 0; + local_irq_disable(); arch_spin_lock(&tr->max_lock); if (!tr->cond_snapshot) @@ -1373,6 +1378,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr) } arch_spin_unlock(&tr->max_lock); + local_irq_enable(); return ret; } @@ -2200,6 +2206,11 @@ static size_t tgid_map_max; #define SAVED_CMDLINES_DEFAULT 128 #define NO_CMDLINE_MAP UINT_MAX +/* + * Preemption must be disabled before acquiring trace_cmdline_lock. + * The various trace_arrays' max_lock must be acquired in a context + * where interrupt is disabled. + */ static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; struct saved_cmdlines_buffer { unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; @@ -2412,7 +2423,11 @@ static int trace_save_cmdline(struct task_struct *tsk) * the lock, but we also don't want to spin * nor do we want to disable interrupts, * so if we miss here, then better luck next time. + * + * This is called within the scheduler and wake up, so interrupts + * had better been disabled and run queue lock been held. */ + lockdep_assert_preemption_disabled(); if (!arch_spin_trylock(&trace_cmdline_lock)) return 0; @@ -5890,9 +5905,11 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf, char buf[64]; int r; + preempt_disable(); arch_spin_lock(&trace_cmdline_lock); r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num); arch_spin_unlock(&trace_cmdline_lock); + preempt_enable(); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -5917,10 +5934,12 @@ static int tracing_resize_saved_cmdlines(unsigned int val) return -ENOMEM; } + preempt_disable(); arch_spin_lock(&trace_cmdline_lock); savedcmd_temp = savedcmd; savedcmd = s; arch_spin_unlock(&trace_cmdline_lock); + preempt_enable(); free_saved_cmdlines_buffer(savedcmd_temp); return 0; @@ -6373,10 +6392,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) #ifdef CONFIG_TRACER_SNAPSHOT if (t->use_max_tr) { + local_irq_disable(); arch_spin_lock(&tr->max_lock); if (tr->cond_snapshot) ret = -EBUSY; arch_spin_unlock(&tr->max_lock); + local_irq_enable(); if (ret) goto out; } @@ -7436,10 +7457,12 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, goto out; } + local_irq_disable(); arch_spin_lock(&tr->max_lock); if (tr->cond_snapshot) ret = -EBUSY; arch_spin_unlock(&tr->max_lock); + local_irq_enable(); if (ret) goto out; -- cgit v1.2.3 From f3ddb74ad0790030c9592229fb14d8c451f4e9a8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 27 Sep 2022 19:15:27 -0400 Subject: tracing: Wake up ring buffer waiters on closing of the file When the file that represents the ring buffer is closed, there may be waiters waiting on more input from the ring buffer. Call ring_buffer_wake_waiters() to wake up any waiters when the file is closed. Link: https://lkml.kernel.org/r/20220927231825.182416969@goodmis.org Cc: stable@vger.kernel.org Cc: Ingo Molnar Cc: Andrew Morton Fixes: e30f53aad2202 ("tracing: Do not busy wait in buffer splice") Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 1 + kernel/trace/trace.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 8401dec93c15..20749bd9db71 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -92,6 +92,7 @@ struct trace_iterator { unsigned int temp_size; char *fmt; /* modified format holder */ unsigned int fmt_size; + long wait_index; /* trace_seq for __print_flags() and __print_symbolic() etc. */ struct trace_seq tmp_seq; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index aed7ea6e6045..e101b0764b39 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8160,6 +8160,12 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) __trace_array_put(iter->tr); + iter->wait_index++; + /* Make sure the waiters see the new wait_index */ + smp_wmb(); + + ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); + if (info->spare) ring_buffer_free_read_page(iter->array_buffer->buffer, info->spare_cpu, info->spare); @@ -8313,6 +8319,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, /* did we read anything? */ if (!spd.nr_pages) { + long wait_index; + if (ret) goto out; @@ -8320,10 +8328,17 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) goto out; + wait_index = READ_ONCE(iter->wait_index); + ret = wait_on_pipe(iter, iter->tr->buffer_percent); if (ret) goto out; + /* Make sure we see the new wait_index */ + smp_rmb(); + if (wait_index != iter->wait_index) + goto out; + goto again; } -- cgit v1.2.3 From 01b2a52171735c6eea80ee2f355f32bea6c41418 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 29 Sep 2022 09:50:29 -0400 Subject: tracing: Add ioctl() to force ring buffer waiters to wake up If a process is waiting on the ring buffer for data, there currently isn't a clean way to force it to wake up. Add an ioctl call that will force any tasks that are waiting on the trace_pipe_raw file to wake up. Link: https://lkml.kernel.org/r/20220929095029.117f913f@gandalf.local.home Cc: stable@vger.kernel.org Cc: Ingo Molnar Cc: Andrew Morton Fixes: e30f53aad2202 ("tracing: Do not busy wait in buffer splice") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e101b0764b39..58afc83afc9d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8349,12 +8349,34 @@ out: return ret; } +/* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */ +static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct ftrace_buffer_info *info = file->private_data; + struct trace_iterator *iter = &info->iter; + + if (cmd) + return -ENOIOCTLCMD; + + mutex_lock(&trace_types_lock); + + iter->wait_index++; + /* Make sure the waiters see the new wait_index */ + smp_wmb(); + + ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); + + mutex_unlock(&trace_types_lock); + return 0; +} + static const struct file_operations tracing_buffers_fops = { .open = tracing_buffers_open, .read = tracing_buffers_read, .poll = tracing_buffers_poll, .release = tracing_buffers_release, .splice_read = tracing_buffers_splice_read, + .unlocked_ioctl = tracing_buffers_ioctl, .llseek = no_llseek, }; -- cgit v1.2.3 From 2b0fd9a59b7990c161fa1cb7b79edb22847c87c2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 28 Sep 2022 18:22:20 -0400 Subject: tracing: Wake up waiters when tracing is disabled When tracing is disabled, there's no reason that waiters should stay waiting, wake them up, otherwise tasks get stuck when they should be flushing the buffers. Cc: stable@vger.kernel.org Fixes: e30f53aad2202 ("tracing: Do not busy wait in buffer splice") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 58afc83afc9d..bb5597c6bfc1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8334,6 +8334,10 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, if (ret) goto out; + /* No need to wait after waking up when tracing is off */ + if (!tracer_tracing_is_on(iter->tr)) + goto out; + /* Make sure we see the new wait_index */ smp_rmb(); if (wait_index != iter->wait_index) @@ -9065,6 +9069,8 @@ rb_simple_write(struct file *filp, const char __user *ubuf, tracer_tracing_off(tr); if (tr->current_trace->stop) tr->current_trace->stop(tr); + /* Wake up any waiters */ + ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS); } mutex_unlock(&trace_types_lock); } -- cgit v1.2.3 From e841e8bfac490957fed3157326b96342dc76798a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 28 Sep 2022 22:58:28 +0100 Subject: tracing: Fix spelling mistake "preapre" -> "prepare" There is a spelling mistake in the trace text. Fix it. Link: https://lkml.kernel.org/r/20220928215828.66325-1-colin.i.king@gmail.com Signed-off-by: Colin Ian King Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bb5597c6bfc1..def721de68a0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -10157,7 +10157,7 @@ __init static int tracer_alloc_buffers(void) * buffer. The memory will be removed once the "instance" is removed. */ ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE, - "trace/RB:preapre", trace_rb_cpu_prepare, + "trace/RB:prepare", trace_rb_cpu_prepare, NULL); if (ret < 0) goto out_free_cpumask; -- cgit v1.2.3 From a541a9559bb0a8ecc434de01d3e4826c32e8bb53 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 5 Oct 2022 11:37:57 -0400 Subject: tracing: Do not free snapshot if tracer is on cmdline The ftrace_boot_snapshot and alloc_snapshot cmdline options allocate the snapshot buffer at boot up for use later. The ftrace_boot_snapshot in particular requires the snapshot to be allocated because it will take a snapshot at the end of boot up allowing to see the traces that happened during boot so that it's not lost when user space takes over. When a tracer is registered (started) there's a path that checks if it requires the snapshot buffer or not, and if it does not and it was allocated it will do a synchronization and free the snapshot buffer. This is only required if the previous tracer was using it for "max latency" snapshots, as it needs to make sure all max snapshots are complete before freeing. But this is only needed if the previous tracer was using the snapshot buffer for latency (like irqoff tracer and friends). But it does not make sense to free it, if the previous tracer was not using it, and the snapshot was allocated by the cmdline parameters. This basically takes away the point of allocating it in the first place! Note, the allocated snapshot worked fine for just trace events, but fails when a tracer is enabled on the cmdline. Further investigation, this goes back even further and it does not require a tracer on the cmdline to fail. Simply enable snapshots and then enable a tracer, and it will remove the snapshot. Link: https://lkml.kernel.org/r/20221005113757.041df7fe@gandalf.local.home Cc: Masami Hiramatsu Cc: Andrew Morton Cc: stable@vger.kernel.org Fixes: 45ad21ca5530 ("tracing: Have trace_array keep track if snapshot buffer is allocated") Reported-by: Ross Zwisler Tested-by: Ross Zwisler Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'kernel/trace/trace.c') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index def721de68a0..47a44b055a1d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6428,12 +6428,12 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) if (tr->current_trace->reset) tr->current_trace->reset(tr); +#ifdef CONFIG_TRACER_MAX_TRACE + had_max_tr = tr->current_trace->use_max_tr; + /* Current trace needs to be nop_trace before synchronize_rcu */ tr->current_trace = &nop_trace; -#ifdef CONFIG_TRACER_MAX_TRACE - had_max_tr = tr->allocated_snapshot; - if (had_max_tr && !t->use_max_tr) { /* * We need to make sure that the update_max_tr sees that @@ -6446,11 +6446,13 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) free_snapshot(tr); } - if (t->use_max_tr && !had_max_tr) { + if (t->use_max_tr && !tr->allocated_snapshot) { ret = tracing_alloc_snapshot_instance(tr); if (ret < 0) goto out; } +#else + tr->current_trace = &nop_trace; #endif if (t->init) { -- cgit v1.2.3