From 119a5d573622ae90ba730d18acfae9bb75d77b9a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 30 Jun 2025 18:04:40 -0400 Subject: ring-buffer: Remove ring_buffer_read_prepare_sync() When the ring buffer was first introduced, reading the non-consuming "trace" file required disabling the writing of the ring buffer. To make sure the writing was fully disabled before iterating the buffer with a non-consuming read, it would set the disable flag of the buffer and then call an RCU synchronization to make sure all the buffers were synchronized. The function ring_buffer_read_start() originally would initialize the iterator and call an RCU synchronization, but this was for each individual per CPU buffer where this would get called many times on a machine with many CPUs before the trace file could be read. The commit 72c9ddfd4c5bf ("ring-buffer: Make non-consuming read less expensive with lots of cpus.") separated ring_buffer_read_start into ring_buffer_read_prepare(), ring_buffer_read_sync() and then ring_buffer_read_start() to allow each of the per CPU buffers to be prepared, call the read_buffer_read_sync() once, and then the ring_buffer_read_start() for each of the CPUs which made things much faster. The commit 1039221cc278 ("ring-buffer: Do not disable recording when there is an iterator") removed the requirement of disabling the recording of the ring buffer in order to iterate it, but it did not remove the synchronization that was happening that was required to wait for all the buffers to have no more writers. It's now OK for the buffers to have writers and no synchronization is needed. Remove the synchronization and put back the interface for the ring buffer iterator back before commit 72c9ddfd4c5bf was applied. Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250630180440.3eabb514@batman.local.home Reported-by: David Howells Fixes: 1039221cc278 ("ring-buffer: Do not disable recording when there is an iterator") Tested-by: David Howells Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ring_buffer.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux/ring_buffer.h') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index cd7f0ae26615..bc90c3c7b5fd 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -152,9 +152,7 @@ ring_buffer_consume(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); struct ring_buffer_iter * -ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags); -void ring_buffer_read_prepare_sync(void); -void ring_buffer_read_start(struct ring_buffer_iter *iter); +ring_buffer_read_start(struct trace_buffer *buffer, int cpu, gfp_t flags); void ring_buffer_read_finish(struct ring_buffer_iter *iter); struct ring_buffer_event * -- cgit v1.2.3 From 788fa4b47cdcd9b3d8c2d02ac0b3cd2540305f18 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 1 Aug 2025 16:37:24 -0400 Subject: tracing: Add guard(ring_buffer_nest) Some calls to the tracing ring buffer can happen when the ring buffer is already being written to by the same context (for example, a trace_printk() in between a ring_buffer_lock_reserve() and a ring_buffer_unlock_commit()). In order to not trigger the recursion detection, these functions use ring_buffer_nest_start() and ring_buffer_nest_end(). Create a guard() for these functions so that their use cases can be simplified and not need to use goto for the release. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://lore.kernel.org/20250801203857.710501021@kernel.org Signed-off-by: Steven Rostedt (Google) --- include/linux/ring_buffer.h | 3 ++ kernel/trace/trace.c | 69 ++++++++++++++++----------------------- kernel/trace/trace_events_synth.c | 6 ++-- 3 files changed, 34 insertions(+), 44 deletions(-) (limited to 'include/linux/ring_buffer.h') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index cd7f0ae26615..8253cb69540c 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -144,6 +144,9 @@ int ring_buffer_write(struct trace_buffer *buffer, void ring_buffer_nest_start(struct trace_buffer *buffer); void ring_buffer_nest_end(struct trace_buffer *buffer); +DEFINE_GUARD(ring_buffer_nest, struct trace_buffer *, + ring_buffer_nest_start(_T), ring_buffer_nest_end(_T)) + struct ring_buffer_event * ring_buffer_peek(struct trace_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0ec9cab9a812..332487179e1d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1160,13 +1160,11 @@ int __trace_array_puts(struct trace_array *tr, unsigned long ip, trace_ctx = tracing_gen_ctx(); buffer = tr->array_buffer.buffer; - ring_buffer_nest_start(buffer); + guard(ring_buffer_nest)(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, trace_ctx); - if (!event) { - size = 0; - goto out; - } + if (!event) + return 0; entry = ring_buffer_event_data(event); entry->ip = ip; @@ -1182,8 +1180,6 @@ int __trace_array_puts(struct trace_array *tr, unsigned long ip, __buffer_unlock_commit(buffer, event); ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); - out: - ring_buffer_nest_end(buffer); return size; } EXPORT_SYMBOL_GPL(__trace_array_puts); @@ -1213,7 +1209,6 @@ int __trace_bputs(unsigned long ip, const char *str) struct bputs_entry *entry; unsigned int trace_ctx; int size = sizeof(struct bputs_entry); - int ret = 0; if (!printk_binsafe(tr)) return __trace_puts(ip, str, strlen(str)); @@ -1227,11 +1222,11 @@ int __trace_bputs(unsigned long ip, const char *str) trace_ctx = tracing_gen_ctx(); buffer = tr->array_buffer.buffer; - ring_buffer_nest_start(buffer); + guard(ring_buffer_nest)(buffer); event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, trace_ctx); if (!event) - goto out; + return 0; entry = ring_buffer_event_data(event); entry->ip = ip; @@ -1240,10 +1235,7 @@ int __trace_bputs(unsigned long ip, const char *str) __buffer_unlock_commit(buffer, event); ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); - ret = 1; - out: - ring_buffer_nest_end(buffer); - return ret; + return 1; } EXPORT_SYMBOL_GPL(__trace_bputs); @@ -3397,21 +3389,19 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) size = sizeof(*entry) + sizeof(u32) * len; buffer = tr->array_buffer.buffer; - ring_buffer_nest_start(buffer); - event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, - trace_ctx); - if (!event) - goto out; - entry = ring_buffer_event_data(event); - entry->ip = ip; - entry->fmt = fmt; - - memcpy(entry->buf, tbuffer, sizeof(u32) * len); - __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); + scoped_guard(ring_buffer_nest, buffer) { + event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, + trace_ctx); + if (!event) + goto out_put; + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->fmt = fmt; -out: - ring_buffer_nest_end(buffer); + memcpy(entry->buf, tbuffer, sizeof(u32) * len); + __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); + } out_put: put_trace_buf(); @@ -3452,20 +3442,19 @@ int __trace_array_vprintk(struct trace_buffer *buffer, len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); size = sizeof(*entry) + len + 1; - ring_buffer_nest_start(buffer); - event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, - trace_ctx); - if (!event) - goto out; - entry = ring_buffer_event_data(event); - entry->ip = ip; - - memcpy(&entry->buf, tbuffer, len + 1); - __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL); + scoped_guard(ring_buffer_nest, buffer) { + event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, + trace_ctx); + if (!event) + goto out; + entry = ring_buffer_event_data(event); + entry->ip = ip; + memcpy(&entry->buf, tbuffer, len + 1); + __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL); + } out: - ring_buffer_nest_end(buffer); put_trace_buf(); out_nobuffer: diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 33cfbd4ed76d..f24ee61f8884 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -536,12 +536,12 @@ static notrace void trace_event_raw_event_synth(void *__data, * is being performed within another event. */ buffer = trace_file->tr->array_buffer.buffer; - ring_buffer_nest_start(buffer); + guard(ring_buffer_nest)(buffer); entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry) + fields_size); if (!entry) - goto out; + return; for (i = 0, n_u64 = 0; i < event->n_fields; i++) { val_idx = var_ref_idx[i]; @@ -584,8 +584,6 @@ static notrace void trace_event_raw_event_synth(void *__data, } trace_event_buffer_commit(&fbuffer); -out: - ring_buffer_nest_end(buffer); } static void free_synth_event_print_fmt(struct trace_event_call *call) -- cgit v1.2.3