From 0550069cc25f513ce1f109c88f7c1f01d63297db Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 11 Dec 2025 14:00:58 +0400 Subject: tracing: Properly process error handling in event_hist_trigger_parse() Memory allocated with trigger_data_alloc() requires trigger_data_free() for proper cleanup. Replace kfree() with trigger_data_free() to fix this. Found via static analysis and code review. This isn't a real bug due to the current code basically being an open coded version of trigger_data_free() without the synchronization. The synchronization isn't needed as this is the error path of creation and there's nothing to synchronize against yet. Replace the kfree() to be consistent with the allocation. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Tom Zanussi Link: https://patch.msgid.link/20251211100058.2381268-1-linmq006@gmail.com Fixes: e1f187d09e11 ("tracing: Have existing event_command.parse() implementations use helpers") Signed-off-by: Miaoqian Lin Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index c97bb2fda5c0..7e50df8b800b 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -6911,7 +6911,7 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, remove_hist_vars(hist_data); - kfree(trigger_data); + trigger_data_free(trigger_data); destroy_hist_data(hist_data); goto out; -- cgit v1.2.3 From a4e0ea0e10a262fb6f1ad55a8a9d203bad776678 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 8 Jan 2026 17:44:29 -0500 Subject: tracing: Remove redundant call to event_trigger_reset_filter() in event_hist_trigger_parse() With the change to replace kfree() with trigger_data_free(), which starts out doing the exact same thing as event_trigger_reset_filter(), there's no reason to call event_trigger_reset_filter() before calling trigger_data_free(). Remove the call to it. Link: https://lore.kernel.org/linux-trace-kernel/20251211204520.0f3ba6d1@fedora/ Cc: Mathieu Desnoyers Cc: Miaoqian Lin Link: https://patch.msgid.link/20260108174429.2d9ca51f@gandalf.local.home Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 7e50df8b800b..0908a9f7e289 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -6907,8 +6907,6 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, out_unreg: event_trigger_unregister(cmd_ops, file, glob+1, trigger_data); out_free: - event_trigger_reset_filter(cmd_ops, trigger_data); - remove_hist_vars(hist_data); trigger_data_free(trigger_data); -- cgit v1.2.3 From 2cddfc2e8fc78c13b0f5286ea5dd48cdf527ad41 Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Fri, 26 Dec 2025 11:07:24 -0500 Subject: tracing: Add bitmask-list option for human-readable bitmask display Add support for displaying bitmasks in human-readable list format (e.g., 0,2-5,7) in addition to the default hexadecimal bitmap representation. This is particularly useful when tracing CPU masks and other large bitmasks where individual bit positions are more meaningful than their hexadecimal encoding. When the "bitmask-list" option is enabled, the printk "%*pbl" format specifier is used to render bitmasks as comma-separated ranges, making trace output easier to interpret for complex CPU configurations and large bitmask values. Link: https://patch.msgid.link/20251226160724.2246493-2-atomlin@atomlin.com Signed-off-by: Aaron Tomlin Signed-off-by: Steven Rostedt (Google) --- Documentation/trace/ftrace.rst | 9 +++++++++ include/linux/trace_events.h | 8 ++++---- include/linux/trace_seq.h | 12 +++++++++++- include/trace/stages/stage3_trace_output.h | 4 ++-- kernel/trace/trace.h | 1 + kernel/trace/trace_output.c | 30 +++++++++++++++++++++++++++--- kernel/trace/trace_seq.c | 29 ++++++++++++++++++++++++++++- 7 files changed, 82 insertions(+), 11 deletions(-) (limited to 'kernel/trace') diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index d1f313a5f4ad..639f4d95732f 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst @@ -1290,6 +1290,15 @@ Here are the available options: This will be useful if you want to find out which hashed value is corresponding to the real value in trace log. + bitmask-list + When enabled, bitmasks are displayed as a human-readable list of + ranges (e.g., 0,2-5,7) using the printk "%*pbl" format specifier. + When disabled (the default), bitmasks are displayed in the + traditional hexadecimal bitmap representation. The list format is + particularly useful for tracing CPU masks and other large bitmasks + where individual bit positions are more meaningful than their + hexadecimal encoding. + record-cmd When any event or tracer is enabled, a hook is enabled in the sched_switch trace point to fill comm cache diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 3690221ba3d8..0a2b8229b999 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -38,7 +38,10 @@ const char *trace_print_symbols_seq_u64(struct trace_seq *p, *symbol_array); #endif -const char *trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, +struct trace_iterator; +struct trace_event; + +const char *trace_print_bitmask_seq(struct trace_iterator *iter, void *bitmask_ptr, unsigned int bitmask_size); const char *trace_print_hex_seq(struct trace_seq *p, @@ -54,9 +57,6 @@ trace_print_hex_dump_seq(struct trace_seq *p, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii); -struct trace_iterator; -struct trace_event; - int trace_raw_output_prep(struct trace_iterator *iter, struct trace_event *event); extern __printf(2, 3) diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 4a0b8c172d27..697d619aafdc 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -114,7 +114,11 @@ extern void trace_seq_putmem_hex(struct trace_seq *s, const void *mem, extern int trace_seq_path(struct trace_seq *s, const struct path *path); extern void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, - int nmaskbits); + int nmaskbits); + +extern void trace_seq_bitmask_list(struct trace_seq *s, + const unsigned long *maskp, + int nmaskbits); extern int trace_seq_hex_dump(struct trace_seq *s, const char *prefix_str, int prefix_type, int rowsize, int groupsize, @@ -137,6 +141,12 @@ trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, { } +static inline void +trace_seq_bitmask_list(struct trace_seq *s, const unsigned long *maskp, + int nmaskbits) +{ +} + static inline int trace_print_seq(struct seq_file *m, struct trace_seq *s) { return 0; diff --git a/include/trace/stages/stage3_trace_output.h b/include/trace/stages/stage3_trace_output.h index 1e7b0bef95f5..fce85ea2df1c 100644 --- a/include/trace/stages/stage3_trace_output.h +++ b/include/trace/stages/stage3_trace_output.h @@ -39,7 +39,7 @@ void *__bitmask = __get_dynamic_array(field); \ unsigned int __bitmask_size; \ __bitmask_size = __get_dynamic_array_len(field); \ - trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ + trace_print_bitmask_seq(iter, __bitmask, __bitmask_size); \ }) #undef __get_cpumask @@ -51,7 +51,7 @@ void *__bitmask = __get_rel_dynamic_array(field); \ unsigned int __bitmask_size; \ __bitmask_size = __get_rel_dynamic_array_len(field); \ - trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ + trace_print_bitmask_seq(iter, __bitmask, __bitmask_size); \ }) #undef __get_rel_cpumask diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b6d42fe06115..8888fc9335b6 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1411,6 +1411,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, C(COPY_MARKER, "copy_trace_marker"), \ C(PAUSE_ON_TRACE, "pause-on-trace"), \ C(HASH_PTR, "hash-ptr"), /* Print hashed pointer */ \ + C(BITMASK_LIST, "bitmask-list"), \ FUNCTION_FLAGS \ FGRAPH_FLAGS \ STACK_FLAGS \ diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index cc2d3306bb60..1996d7aba038 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -194,13 +194,37 @@ trace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, EXPORT_SYMBOL(trace_print_symbols_seq_u64); #endif +/** + * trace_print_bitmask_seq - print a bitmask to a sequence buffer + * @iter: The trace iterator for the current event instance + * @bitmask_ptr: The pointer to the bitmask data + * @bitmask_size: The size of the bitmask in bytes + * + * Prints a bitmask into a sequence buffer as either a hex string or a + * human-readable range list, depending on the instance's "bitmask-list" + * trace option. The bitmask is formatted into the iterator's temporary + * scratchpad rather than the primary sequence buffer. This avoids + * duplication and pointer-collision issues when the returned string is + * processed by a "%s" specifier in a TP_printk() macro. + * + * Returns a pointer to the formatted string within the temporary buffer. + */ const char * -trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, +trace_print_bitmask_seq(struct trace_iterator *iter, void *bitmask_ptr, unsigned int bitmask_size) { - const char *ret = trace_seq_buffer_ptr(p); + struct trace_seq *p = &iter->tmp_seq; + const struct trace_array *tr = iter->tr; + const char *ret; + + trace_seq_init(p); + ret = trace_seq_buffer_ptr(p); + + if (tr->trace_flags & TRACE_ITER(BITMASK_LIST)) + trace_seq_bitmask_list(p, bitmask_ptr, bitmask_size * 8); + else + trace_seq_bitmask(p, bitmask_ptr, bitmask_size * 8); - trace_seq_bitmask(p, bitmask_ptr, bitmask_size * 8); trace_seq_putc(p, 0); return ret; diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c index 32684ef4fb9d..85f6f10d107f 100644 --- a/kernel/trace/trace_seq.c +++ b/kernel/trace/trace_seq.c @@ -106,7 +106,7 @@ EXPORT_SYMBOL_GPL(trace_seq_printf); * Writes a ASCII representation of a bitmask string into @s. */ void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, - int nmaskbits) + int nmaskbits) { unsigned int save_len = s->seq.len; @@ -124,6 +124,33 @@ void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, } EXPORT_SYMBOL_GPL(trace_seq_bitmask); +/** + * trace_seq_bitmask_list - write a bitmask array in its list representation + * @s: trace sequence descriptor + * @maskp: points to an array of unsigned longs that represent a bitmask + * @nmaskbits: The number of bits that are valid in @maskp + * + * Writes a list representation (e.g., 0-3,5-7) of a bitmask string into @s. + */ +void trace_seq_bitmask_list(struct trace_seq *s, const unsigned long *maskp, + int nmaskbits) +{ + unsigned int save_len = s->seq.len; + + if (s->full) + return; + + __trace_seq_init(s); + + seq_buf_printf(&s->seq, "%*pbl", nmaskbits, maskp); + + if (unlikely(seq_buf_has_overflowed(&s->seq))) { + s->seq.len = save_len; + s->full = 1; + } +} +EXPORT_SYMBOL_GPL(trace_seq_bitmask_list); + /** * trace_seq_vprintf - sequence printing of trace information * @s: trace sequence descriptor -- cgit v1.2.3 From e5136678b1c861ed7d0c985c1acdecd37f949937 Mon Sep 17 00:00:00 2001 From: Marco Crivellari Date: Tue, 30 Dec 2025 15:28:20 +0100 Subject: tracing: Replace use of system_wq with system_dfl_wq This patch continues the effort to refactor workqueue APIs, which has begun with the changes introducing new workqueues and a new alloc_workqueue flag: commit 128ea9f6ccfb ("workqueue: Add system_percpu_wq and system_dfl_wq") commit 930c2ea566af ("workqueue: Add new WQ_PERCPU flag") The point of the refactoring is to eventually alter the default behavior of workqueues to become unbound by default so that their workload placement is optimized by the scheduler. Before that to happen after a careful review and conversion of each individual case, workqueue users must be converted to the better named new workqueues with no intended behaviour changes: system_wq -> system_percpu_wq system_unbound_wq -> system_dfl_wq This specific workflow has no benefits being per-cpu, so instead of system_percpu_wq the new unbound workqueue has been used (system_dfl_wq). This way the old obsolete workqueues (system_wq, system_unbound_wq) can be removed in the future. Cc: Lai Jiangshan Cc: Frederic Weisbecker Cc: Sebastian Andrzej Siewior Cc: Michal Hocko Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20251230142820.173712-1-marco.crivellari@suse.com Suggested-by: Tejun Heo Signed-off-by: Marco Crivellari Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 385af8405392..7001e34476ee 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1375,7 +1375,7 @@ static void free_filter_list_tasks(struct rcu_head *rhp) struct filter_head *filter_list = container_of(rhp, struct filter_head, rcu); INIT_RCU_WORK(&filter_list->rwork, free_filter_list_work); - queue_rcu_work(system_wq, &filter_list->rwork); + queue_rcu_work(system_dfl_wq, &filter_list->rwork); } /* -- cgit v1.2.3 From 729757b96a662d87e334fe8b837707800d8fd551 Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Mon, 5 Jan 2026 09:29:38 -0500 Subject: tracing: Add show_event_filters to expose active event filters Currently, to audit active Ftrace event filters, userspace must recursively traverse the events/ directory and read each individual filter file. This is inefficient for monitoring tools and debugging. Introduce "show_event_filters" at the trace root directory. This file displays all events that currently have a filter applied, alongside the actual filter string, in a consolidated system:event [tab] filter format. The implementation reuses the existing trace_event_file iterators to ensure atomic traversal of the event list and utilises guard(rcu)() for automatic, scope-based protection when accessing volatile filter strings. Link: https://patch.msgid.link/20260105142939.2655342-2-atomlin@atomlin.com Signed-off-by: Aaron Tomlin Signed-off-by: Steven Rostedt (Google) --- Documentation/trace/ftrace.rst | 8 ++++++ kernel/trace/trace_events.c | 58 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) (limited to 'kernel/trace') diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index 639f4d95732f..4ce01e726b09 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst @@ -684,6 +684,14 @@ of ftrace. Here is a list of some of the key files: See events.rst for more information. + show_event_filters: + + A list of events that have filters. This shows the + system/event pair along with the filter that is attached to + the event. + + See events.rst for more information. + available_events: A list of events that can be enabled in tracing. diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 137b4d9bb116..6cbd36508368 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1662,6 +1662,32 @@ static void t_stop(struct seq_file *m, void *p) mutex_unlock(&event_mutex); } +/** + * t_show_filters - seq_file callback to display active event filters + * @m: The seq_file interface for formatted output + * @v: The current trace_event_file being iterated + * + * Identifies and prints active filters for the current event file in the + * iteration. If a filter is applied to the current event and, if so, + * prints the system name, event name, and the filter string. + */ +static int t_show_filters(struct seq_file *m, void *v) +{ + struct trace_event_file *file = v; + struct trace_event_call *call = file->event_call; + struct event_filter *filter; + + guard(rcu)(); + filter = rcu_dereference(file->filter); + if (!filter || !filter->filter_string) + return 0; + + seq_printf(m, "%s:%s\t%s\n", call->class->system, + trace_event_name(call), filter->filter_string); + + return 0; +} + #ifdef CONFIG_MODULES static int s_show(struct seq_file *m, void *v) { @@ -2489,6 +2515,7 @@ ftrace_event_npid_write(struct file *filp, const char __user *ubuf, static int ftrace_event_avail_open(struct inode *inode, struct file *file); static int ftrace_event_set_open(struct inode *inode, struct file *file); +static int ftrace_event_show_filters_open(struct inode *inode, struct file *file); static int ftrace_event_set_pid_open(struct inode *inode, struct file *file); static int ftrace_event_set_npid_open(struct inode *inode, struct file *file); static int ftrace_event_release(struct inode *inode, struct file *file); @@ -2507,6 +2534,13 @@ static const struct seq_operations show_set_event_seq_ops = { .stop = s_stop, }; +static const struct seq_operations show_show_event_filters_seq_ops = { + .start = t_start, + .next = t_next, + .show = t_show_filters, + .stop = t_stop, +}; + static const struct seq_operations show_set_pid_seq_ops = { .start = p_start, .next = p_next, @@ -2536,6 +2570,13 @@ static const struct file_operations ftrace_set_event_fops = { .release = ftrace_event_release, }; +static const struct file_operations ftrace_show_event_filters_fops = { + .open = ftrace_event_show_filters_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static const struct file_operations ftrace_set_event_pid_fops = { .open = ftrace_event_set_pid_open, .read = seq_read, @@ -2680,6 +2721,20 @@ ftrace_event_set_open(struct inode *inode, struct file *file) return ret; } +/** + * ftrace_event_show_filters_open - open interface for set_event_filters + * @inode: The inode of the file + * @file: The file being opened + * + * Connects the set_event_filters file to the sequence operations + * required to iterate over and display active event filters. + */ +static int +ftrace_event_show_filters_open(struct inode *inode, struct file *file) +{ + return ftrace_event_open(inode, file, &show_show_event_filters_seq_ops); +} + static int ftrace_event_set_pid_open(struct inode *inode, struct file *file) { @@ -4400,6 +4455,9 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) if (!entry) return -ENOMEM; + trace_create_file("show_event_filters", TRACE_MODE_READ, parent, tr, + &ftrace_show_event_filters_fops); + nr_entries = ARRAY_SIZE(events_entries); e_events = eventfs_create_events_dir("events", parent, events_entries, -- cgit v1.2.3 From 6a80838814eea232a83fab3ac33282cd1243da5b Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Mon, 5 Jan 2026 09:29:39 -0500 Subject: tracing: Add show_event_triggers to expose active event triggers To audit active event triggers, userspace currently must traverse the events/ directory and read each individual trigger file. This is cumbersome for system-wide auditing or debugging. Introduce "show_event_triggers" at the trace root directory. This file displays all events that currently have one or more triggers applied, alongside the trigger configuration, in a consolidated system:event [tab] trigger format. The implementation leverages the existing trace_event_file iterators and uses the trigger's own print() operation to ensure output consistency with the per-event trigger files. Link: https://patch.msgid.link/20260105142939.2655342-3-atomlin@atomlin.com Signed-off-by: Aaron Tomlin Signed-off-by: Steven Rostedt (Google) --- Documentation/trace/ftrace.rst | 8 ++++++ kernel/trace/trace_events.c | 64 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) (limited to 'kernel/trace') diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index 4ce01e726b09..b9efb148a5c2 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst @@ -692,6 +692,14 @@ of ftrace. Here is a list of some of the key files: See events.rst for more information. + show_event_triggers: + + A list of events that have triggers. This shows the + system/event pair along with the trigger that is attached to + the event. + + See events.rst for more information. + available_events: A list of events that can be enabled in tracing. diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 6cbd36508368..36936697fa2a 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1688,6 +1688,38 @@ static int t_show_filters(struct seq_file *m, void *v) return 0; } +/** + * t_show_triggers - seq_file callback to display active event triggers + * @m: The seq_file interface for formatted output + * @v: The current trace_event_file being iterated + * + * Iterates through the trigger list of the current event file and prints + * each active trigger's configuration using its associated print + * operation. + */ +static int t_show_triggers(struct seq_file *m, void *v) +{ + struct trace_event_file *file = v; + struct trace_event_call *call = file->event_call; + struct event_trigger_data *data; + + /* + * The event_mutex is held by t_start(), protecting the + * file->triggers list traversal. + */ + if (list_empty(&file->triggers)) + return 0; + + list_for_each_entry_rcu(data, &file->triggers, list) { + seq_printf(m, "%s:%s\t", call->class->system, + trace_event_name(call)); + + data->cmd_ops->print(m, data); + } + + return 0; +} + #ifdef CONFIG_MODULES static int s_show(struct seq_file *m, void *v) { @@ -2516,6 +2548,7 @@ ftrace_event_npid_write(struct file *filp, const char __user *ubuf, static int ftrace_event_avail_open(struct inode *inode, struct file *file); static int ftrace_event_set_open(struct inode *inode, struct file *file); static int ftrace_event_show_filters_open(struct inode *inode, struct file *file); +static int ftrace_event_show_triggers_open(struct inode *inode, struct file *file); static int ftrace_event_set_pid_open(struct inode *inode, struct file *file); static int ftrace_event_set_npid_open(struct inode *inode, struct file *file); static int ftrace_event_release(struct inode *inode, struct file *file); @@ -2541,6 +2574,13 @@ static const struct seq_operations show_show_event_filters_seq_ops = { .stop = t_stop, }; +static const struct seq_operations show_show_event_triggers_seq_ops = { + .start = t_start, + .next = t_next, + .show = t_show_triggers, + .stop = t_stop, +}; + static const struct seq_operations show_set_pid_seq_ops = { .start = p_start, .next = p_next, @@ -2577,6 +2617,13 @@ static const struct file_operations ftrace_show_event_filters_fops = { .release = seq_release, }; +static const struct file_operations ftrace_show_event_triggers_fops = { + .open = ftrace_event_show_triggers_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static const struct file_operations ftrace_set_event_pid_fops = { .open = ftrace_event_set_pid_open, .read = seq_read, @@ -2735,6 +2782,20 @@ ftrace_event_show_filters_open(struct inode *inode, struct file *file) return ftrace_event_open(inode, file, &show_show_event_filters_seq_ops); } +/** + * ftrace_event_show_triggers_open - open interface for show_event_triggers + * @inode: The inode of the file + * @file: The file being opened + * + * Connects the show_event_triggers file to the sequence operations + * required to iterate over and display active event triggers. + */ +static int +ftrace_event_show_triggers_open(struct inode *inode, struct file *file) +{ + return ftrace_event_open(inode, file, &show_show_event_triggers_seq_ops); +} + static int ftrace_event_set_pid_open(struct inode *inode, struct file *file) { @@ -4458,6 +4519,9 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) trace_create_file("show_event_filters", TRACE_MODE_READ, parent, tr, &ftrace_show_event_filters_fops); + trace_create_file("show_event_triggers", TRACE_MODE_READ, parent, tr, + &ftrace_show_event_triggers_fops); + nr_entries = ARRAY_SIZE(events_entries); e_events = eventfs_create_events_dir("events", parent, events_entries, -- cgit v1.2.3 From e4ef389e761bc37904f5cf64b99af5c6c603f2ba Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 7 Jan 2026 16:15:10 -0500 Subject: tracing: Check the return value of tracing_update_buffers() In the very unlikely event that tracing_update_buffers() fails in trace_printk_init_buffers(), report the failure so that it is known. Link: https://lore.kernel.org/all/20220917020353.3836285-1-floridsleeves@gmail.com/ Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260107161510.4dc98b15@gandalf.local.home Suggested-by: Li Zhong Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8bd4ec08fb36..870205cba31e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3309,9 +3309,10 @@ void trace_printk_init_buffers(void) pr_warn("**********************************************************\n"); /* Expand the buffers to set size */ - tracing_update_buffers(&global_trace); - - buffers_allocated = 1; + if (tracing_update_buffers(&global_trace) < 0) + pr_err("Failed to expand tracing buffers for trace_printk() calls\n"); + else + buffers_allocated = 1; /* * trace_printk_init_buffers() can be called by modules. -- cgit v1.2.3 From 8aa76aa415897f6c1ba47d9f131fa463499c4169 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Thu, 8 Jan 2026 14:21:32 +0100 Subject: ring-buffer: Use a housekeeping CPU to wake up waiters Avoid running the wakeup irq_work on an isolated CPU. Since the wakeup can run on any CPU, let's pick a housekeeping CPU to do the job. This change reduces additional noise when tracing isolated CPUs. For example, the following ipi_send_cpu stack trace was captured with nohz_full=2 on the isolated CPU: -0 [002] d.h4. 1255.379293: ipi_send_cpu: cpu=2 callsite=irq_work_queue+0x2d/0x50 callback=rb_wake_up_waiters+0x0/0x80 -0 [002] d.h4. 1255.379329: => trace_event_raw_event_ipi_send_cpu => __irq_work_queue_local => irq_work_queue => ring_buffer_unlock_commit => trace_buffer_unlock_commit_regs => trace_event_buffer_commit => trace_event_raw_event_x86_irq_vector => __sysvec_apic_timer_interrupt => sysvec_apic_timer_interrupt => asm_sysvec_apic_timer_interrupt => pv_native_safe_halt => default_idle => default_idle_call => do_idle => cpu_startup_entry => start_secondary => common_startup_64 The IRQ work interrupt alone adds considerable noise, but the impact can get even worse with PREEMPT_RT, because the IRQ work interrupt is then handled by a separate kernel thread. This requires a task switch and makes tracing useless for analyzing latency on an isolated CPU. After applying the patch, the trace is similar, but ipi_send_cpu always targets a non-isolated CPU. Unfortunately, irq_work_queue_on() is not NMI-safe. When running in NMI context, fall back to queuing the irq work on the local CPU. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Sebastian Andrzej Siewior Cc: Clark Williams Cc: Frederic Weisbecker Link: https://patch.msgid.link/20260108132132.2473515-1-ptesarik@suse.com Signed-off-by: Petr Tesarik Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 630221b00838..d33103408955 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -4,6 +4,7 @@ * * Copyright (C) 2008 Steven Rostedt */ +#include #include #include #include @@ -4013,19 +4014,36 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer) rb_end_commit(cpu_buffer); } +static bool +rb_irq_work_queue(struct rb_irq_work *irq_work) +{ + int cpu; + + /* irq_work_queue_on() is not NMI-safe */ + if (unlikely(in_nmi())) + return irq_work_queue(&irq_work->work); + + /* + * If CPU isolation is not active, cpu is always the current + * CPU, and the following is equivallent to irq_work_queue(). + */ + cpu = housekeeping_any_cpu(HK_TYPE_KERNEL_NOISE); + return irq_work_queue_on(&irq_work->work, cpu); +} + static __always_inline void rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) { if (buffer->irq_work.waiters_pending) { buffer->irq_work.waiters_pending = false; /* irq_work_queue() supplies it's own memory barriers */ - irq_work_queue(&buffer->irq_work.work); + rb_irq_work_queue(&buffer->irq_work); } if (cpu_buffer->irq_work.waiters_pending) { cpu_buffer->irq_work.waiters_pending = false; /* irq_work_queue() supplies it's own memory barriers */ - irq_work_queue(&cpu_buffer->irq_work.work); + rb_irq_work_queue(&cpu_buffer->irq_work); } if (cpu_buffer->last_pages_touch == local_read(&cpu_buffer->pages_touched)) @@ -4045,7 +4063,7 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->irq_work.wakeup_full = true; cpu_buffer->irq_work.full_waiters_pending = false; /* irq_work_queue() supplies it's own memory barriers */ - irq_work_queue(&cpu_buffer->irq_work.work); + rb_irq_work_queue(&cpu_buffer->irq_work); } #ifdef CONFIG_RING_BUFFER_RECORD_RECURSION -- cgit v1.2.3 From 2d8b7f9bf8e6e7ae4e5a457bbaee2f84cdfd61f1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 Jan 2026 15:34:08 -0500 Subject: tracing: Have show_event_trigger/filter format a bit more in columns By doing: # trace-cmd sqlhist -e -n futex_wait select TIMESTAMP_DELTA_USECS as lat from sys_enter_futex as start join sys_exit_futex as end on start.common_pid = end.common_pid and # trace-cmd start -e futex_wait -f 'lat > 100' -e page_pool_state_release -f 'pfn == 1' The output of the show_event_trigger and show_event_filter files are well aligned because of the inconsistent 'tab' spacing: ~# cat /sys/kernel/tracing/show_event_triggers syscalls:sys_exit_futex hist:keys=common_pid:vals=hitcount:__lat_12046_2=common_timestamp.usecs-$__arg_12046_1:sort=hitcount:size=2048:clock=global:onmatch(syscalls.sys_enter_futex).trace(futex_wait,$__lat_12046_2) [active] syscalls:sys_enter_futex hist:keys=common_pid:vals=hitcount:__arg_12046_1=common_timestamp.usecs:sort=hitcount:size=2048:clock=global [active] ~# cat /sys/kernel/tracing/show_event_filters synthetic:futex_wait (lat > 100) page_pool:page_pool_state_release (pfn == 1) This makes it not so easy to read. Instead, force the spacing to be at least 32 bytes from the beginning (one space if the system:event is longer than 30 bytes): ~# cat /sys/kernel/tracing/show_event_triggers syscalls:sys_exit_futex hist:keys=common_pid:vals=hitcount:__lat_8125_2=common_timestamp.usecs-$__arg_8125_1:sort=hitcount:size=2048:clock=global:onmatch(syscalls.sys_enter_futex).trace(futex_wait,$__lat_8125_2) [active] syscalls:sys_enter_futex hist:keys=common_pid:vals=hitcount:__arg_8125_1=common_timestamp.usecs:sort=hitcount:size=2048:clock=global [active] ~# cat /sys/kernel/tracing/show_event_filters synthetic:futex_wait (lat > 100) page_pool:page_pool_state_release (pfn == 1) Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260112153408.18373e73@gandalf.local.home Reviewed-by: Aaron Tomlin Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 36936697fa2a..f372a6374164 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1662,6 +1662,18 @@ static void t_stop(struct seq_file *m, void *p) mutex_unlock(&event_mutex); } +static int get_call_len(struct trace_event_call *call) +{ + int len; + + /* Get the length of ":" */ + len = strlen(call->class->system) + 1; + len += strlen(trace_event_name(call)); + + /* Set the index to 32 bytes to separate event from data */ + return len >= 32 ? 1 : 32 - len; +} + /** * t_show_filters - seq_file callback to display active event filters * @m: The seq_file interface for formatted output @@ -1676,14 +1688,17 @@ static int t_show_filters(struct seq_file *m, void *v) struct trace_event_file *file = v; struct trace_event_call *call = file->event_call; struct event_filter *filter; + int len; guard(rcu)(); filter = rcu_dereference(file->filter); if (!filter || !filter->filter_string) return 0; - seq_printf(m, "%s:%s\t%s\n", call->class->system, - trace_event_name(call), filter->filter_string); + len = get_call_len(call); + + seq_printf(m, "%s:%s%*.s%s\n", call->class->system, + trace_event_name(call), len, "", filter->filter_string); return 0; } @@ -1702,6 +1717,7 @@ static int t_show_triggers(struct seq_file *m, void *v) struct trace_event_file *file = v; struct trace_event_call *call = file->event_call; struct event_trigger_data *data; + int len; /* * The event_mutex is held by t_start(), protecting the @@ -1710,9 +1726,11 @@ static int t_show_triggers(struct seq_file *m, void *v) if (list_empty(&file->triggers)) return 0; + len = get_call_len(call); + list_for_each_entry_rcu(data, &file->triggers, list) { - seq_printf(m, "%s:%s\t", call->class->system, - trace_event_name(call)); + seq_printf(m, "%s:%s%*.s", call->class->system, + trace_event_name(call), len, ""); data->cmd_ops->print(m, data); } -- cgit v1.2.3 From a9e0c5897a787751c373812ea21fabf955625b34 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 13 Jan 2026 07:22:43 -0800 Subject: ftrace: Introduce and use ENTRIES_PER_PAGE_GROUP macro ENTRIES_PER_PAGE_GROUP() returns the number of dyn_ftrace entries in a page group, identified by its order. No functional change. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260113152243.3557219-2-linux@roeck-us.net Signed-off-by: Guenter Roeck Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index aa758efc3731..df4ce244202e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1148,6 +1148,7 @@ struct ftrace_page { }; #define ENTRY_SIZE sizeof(struct dyn_ftrace) +#define ENTRIES_PER_PAGE_GROUP(order) ((PAGE_SIZE << (order)) / ENTRY_SIZE) static struct ftrace_page *ftrace_pages_start; static struct ftrace_page *ftrace_pages; @@ -3862,7 +3863,7 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count, *num_pages += 1 << order; ftrace_number_of_groups++; - cnt = (PAGE_SIZE << order) / ENTRY_SIZE; + cnt = ENTRIES_PER_PAGE_GROUP(order); pg->order = order; if (cnt > count) @@ -7309,7 +7310,7 @@ static int ftrace_process_locs(struct module *mod, long skip; /* Count the number of entries unused and compare it to skipped. */ - pg_remaining = (PAGE_SIZE << pg->order) / ENTRY_SIZE - pg->index; + pg_remaining = ENTRIES_PER_PAGE_GROUP(pg->order) - pg->index; if (!WARN(skipped < pg_remaining, "Extra allocated pages for ftrace")) { @@ -7317,7 +7318,7 @@ static int ftrace_process_locs(struct module *mod, for (pg = pg_unuse; pg && skip > 0; pg = pg->next) { remaining += 1 << pg->order; - skip -= (PAGE_SIZE << pg->order) / ENTRY_SIZE; + skip -= ENTRIES_PER_PAGE_GROUP(pg->order); } pages -= remaining; -- cgit v1.2.3 From 6bdf07302f42783345289caec7d91fa364e013ec Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 21 Jan 2026 09:38:58 -0500 Subject: tracing: Disable trace_printk buffer on warning too When /proc/sys/kernel/traceoff_on_warning is set to 1, the top level tracing buffer is disabled when a warning happens. This is very useful when debugging and want the tracing buffer to stop taking new data when a warning triggers keeping the events that lead up to the warning from being overwritten. Now that there is also a persistent ring buffer and an option to have trace_printk go to that buffer, the same holds true for that buffer. A warning could happen just before a crash but still write enough events to lose the events that lead up to the first warning that was the reason for the crash. When /proc/sys/kernel/traceoff_on_warning is set to 1 and a warning is triggered, not only disable the top level tracing buffer, but also disable the buffer that trace_printk()s are written to. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Mark Rutland Link: https://patch.msgid.link/20260121093858.5c5d7e7b@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 870205cba31e..396d59202438 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1666,9 +1666,18 @@ EXPORT_SYMBOL_GPL(tracing_off); void disable_trace_on_warning(void) { if (__disable_trace_on_warning) { + struct trace_array *tr = READ_ONCE(printk_trace); + trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_, "Disabling tracing due to warning\n"); tracing_off(); + + /* Disable trace_printk() buffer too */ + if (tr != &global_trace) { + trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, + "Disabling tracing due to warning\n"); + tracer_tracing_off(tr); + } } } -- cgit v1.2.3 From 45641096c9c3eb8213616df50beaa5f92b201876 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 Jan 2026 20:38:22 -0500 Subject: tracing: Have hist_debug show what function a field uses When CONFIG_HIST_TRIGGERS_DEBUG is enabled, each trace event has a "hist_debug" file that explains the histogram internal data. This is very useful for debugging histograms. One bit of data that was missing from this file was what function a histogram field uses to process its data. The hist_field structure now has a fn_num that is used by a switch statement in hist_fn_call() to call a function directly (to avoid spectre mitigations). Instead of displaying that number, create a string array that maps to the histogram function enums so that the function for a field may be displayed: ~# cat /sys/kernel/tracing/events/sched/sched_switch/hist_debug [..] hist_data: 0000000043d62762 n_vals: 2 n_keys: 1 n_fields: 3 val fields: hist_data->fields[0]: flags: VAL: HIST_FIELD_FL_HITCOUNT type: u64 size: 8 is_signed: 0 function: hist_field_counter() hist_data->fields[1]: flags: HIST_FIELD_FL_VAR var.name: __arg_3921_2 var.idx (into tracing_map_elt.vars[]): 0 type: unsigned long[] size: 128 is_signed: 0 function: hist_field_nop() key fields: hist_data->fields[2]: flags: HIST_FIELD_FL_KEY ftrace_event_field name: prev_pid type: pid_t size: 8 is_signed: 1 function: hist_field_s32() The "function:" field above is added. Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260122203822.58df4d80@gandalf.local.home Reviewed-by: Tom Zanussi Tested-by: Tom Zanussi Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 75 +++++++++++++++++++++++----------------- 1 file changed, 44 insertions(+), 31 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 0908a9f7e289..e245446a8cf7 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -105,38 +105,44 @@ enum field_op_id { FIELD_OP_MULT, }; +#define FIELD_FUNCS \ + C(NOP, "nop"), \ + C(VAR_REF, "var_ref"), \ + C(COUNTER, "counter"), \ + C(CONST, "const"), \ + C(LOG2, "log2"), \ + C(BUCKET, "bucket"), \ + C(TIMESTAMP, "timestamp"), \ + C(CPU, "cpu"), \ + C(COMM, "comm"), \ + C(STRING, "string"), \ + C(DYNSTRING, "dynstring"), \ + C(RELDYNSTRING, "reldynstring"), \ + C(PSTRING, "pstring"), \ + C(S64, "s64"), \ + C(U64, "u64"), \ + C(S32, "s32"), \ + C(U32, "u32"), \ + C(S16, "s16"), \ + C(U16, "u16"), \ + C(S8, "s8"), \ + C(U8, "u8"), \ + C(UMINUS, "uminus"), \ + C(MINUS, "minus"), \ + C(PLUS, "plus"), \ + C(DIV, "div"), \ + C(MULT, "mult"), \ + C(DIV_POWER2, "div_power2"), \ + C(DIV_NOT_POWER2, "div_not_power2"), \ + C(DIV_MULT_SHIFT, "div_mult_shift"), \ + C(EXECNAME, "execname"), \ + C(STACK, "stack"), + +#undef C +#define C(a, b) HIST_FIELD_FN_##a + enum hist_field_fn { - HIST_FIELD_FN_NOP, - HIST_FIELD_FN_VAR_REF, - HIST_FIELD_FN_COUNTER, - HIST_FIELD_FN_CONST, - HIST_FIELD_FN_LOG2, - HIST_FIELD_FN_BUCKET, - HIST_FIELD_FN_TIMESTAMP, - HIST_FIELD_FN_CPU, - HIST_FIELD_FN_COMM, - HIST_FIELD_FN_STRING, - HIST_FIELD_FN_DYNSTRING, - HIST_FIELD_FN_RELDYNSTRING, - HIST_FIELD_FN_PSTRING, - HIST_FIELD_FN_S64, - HIST_FIELD_FN_U64, - HIST_FIELD_FN_S32, - HIST_FIELD_FN_U32, - HIST_FIELD_FN_S16, - HIST_FIELD_FN_U16, - HIST_FIELD_FN_S8, - HIST_FIELD_FN_U8, - HIST_FIELD_FN_UMINUS, - HIST_FIELD_FN_MINUS, - HIST_FIELD_FN_PLUS, - HIST_FIELD_FN_DIV, - HIST_FIELD_FN_MULT, - HIST_FIELD_FN_DIV_POWER2, - HIST_FIELD_FN_DIV_NOT_POWER2, - HIST_FIELD_FN_DIV_MULT_SHIFT, - HIST_FIELD_FN_EXECNAME, - HIST_FIELD_FN_STACK, + FIELD_FUNCS }; /* @@ -5854,6 +5860,12 @@ const struct file_operations event_hist_fops = { }; #ifdef CONFIG_HIST_TRIGGERS_DEBUG + +#undef C +#define C(a, b) b + +static const char * const field_funcs[] = { FIELD_FUNCS }; + static void hist_field_debug_show_flags(struct seq_file *m, unsigned long flags) { @@ -5918,6 +5930,7 @@ static int hist_field_debug_show(struct seq_file *m, seq_printf(m, " type: %s\n", field->type); seq_printf(m, " size: %u\n", field->size); seq_printf(m, " is_signed: %u\n", field->is_signed); + seq_printf(m, " function: hist_field_%s()\n", field_funcs[field->fn_num]); return 0; } -- cgit v1.2.3 From ef742dc5f8cd941bd7ad7dda132458909cb298d2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 Jan 2026 20:45:26 -0500 Subject: tracing: Remove notrace from trace_event_raw_event_synth() When debugging the synthetic events, being able to function trace its functions is very useful (now that CONFIG_FUNCTION_SELF_TRACING is available). For some reason trace_event_raw_event_synth() was marked as "notrace", which was totally unnecessary as all of the tracing directory had function tracing disabled until the recent FUNCTION_SELF_TRACING was added. Remove the notrace annotation from trace_event_raw_event_synth() as there's no reason to not trace it when tracing synthetic event functions. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260122204526.068a98c9@gandalf.local.home Acked-by: Tom Zanussi Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_synth.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 45c187e77e21..ce42fbf16f4a 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -499,9 +499,9 @@ static unsigned int trace_stack(struct synth_trace_event *entry, return len; } -static notrace void trace_event_raw_event_synth(void *__data, - u64 *var_ref_vals, - unsigned int *var_ref_idx) +static void trace_event_raw_event_synth(void *__data, + u64 *var_ref_vals, + unsigned int *var_ref_idx) { unsigned int i, n_u64, val_idx, len, data_size = 0; struct trace_event_file *trace_file = __data; -- cgit v1.2.3 From e62750b6ab4d57f6cf4ea1550de8d2e111adb675 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 23 Jan 2026 10:54:15 -0500 Subject: tracing: Up the hist stacktrace size from 16 to 31 Recording stacktraces is very useful, but the size of 16 deep is very restrictive. For example, in seeing where tasks schedule out in a non running state, the following can be used: ~# cd /sys/kernel/tracing ~# echo 'hist:keys=common_stacktrace:vals=hitcount if prev_state & 3' > events/sched/sched_switch/trigger ~# cat events/sched/sched_switch/hist [..] { common_stacktrace: __schedule+0xdc0/0x1860 schedule+0x27/0xd0 schedule_timeout+0xb5/0x100 wait_for_completion+0x8a/0x140 xfs_buf_iowait+0x20/0xd0 [xfs] xfs_buf_read_map+0x103/0x250 [xfs] xfs_trans_read_buf_map+0x161/0x310 [xfs] xfs_btree_read_buf_block+0xa0/0x120 [xfs] xfs_btree_lookup_get_block+0xa3/0x1e0 [xfs] xfs_btree_lookup+0xea/0x530 [xfs] xfs_alloc_fixup_trees+0x72/0x570 [xfs] xfs_alloc_ag_vextent_size+0x67f/0x800 [xfs] xfs_alloc_vextent_iterate_ags.constprop.0+0x52/0x230 [xfs] xfs_alloc_vextent_start_ag+0x9d/0x1b0 [xfs] xfs_bmap_btalloc+0x2af/0x680 [xfs] xfs_bmapi_allocate+0xdb/0x2c0 [xfs] } hitcount: 1 [..] The above stops at 16 functions where knowing more would be useful. As the allocated storage for stacks is the same for strings, and that size is 256 bytes, there is a lot of space not being used for stacktraces. 16 * 8 = 128 Up the size to 31 (it requires the last slot to be zero, so it can't be 32). Also change the BUILD_BUG_ON() to allow the size of the stacktrace storage to be equal to the max size. One slot is used to hold the number of elements in the stack. BUILD_BUG_ON((HIST_STACKTRACE_DEPTH + 1) * sizeof(long) >= STR_VAR_LEN_MAX); Change that from ">=" to just ">", as now they are equal. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260123105415.2be26bf4@gandalf.local.home Reviewed-by: Tom Zanussi Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.h | 2 +- kernel/trace/trace_events_hist.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8888fc9335b6..69e7defba6c6 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -128,7 +128,7 @@ enum trace_type { #define FAULT_STRING "(fault)" -#define HIST_STACKTRACE_DEPTH 16 +#define HIST_STACKTRACE_DEPTH 31 #define HIST_STACKTRACE_SIZE (HIST_STACKTRACE_DEPTH * sizeof(unsigned long)) #define HIST_STACKTRACE_SKIP 5 diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index e245446a8cf7..0fc641461be5 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -3163,7 +3163,7 @@ static inline void __update_field_vars(struct tracing_map_elt *elt, u64 var_val; /* Make sure stacktrace can fit in the string variable length */ - BUILD_BUG_ON((HIST_STACKTRACE_DEPTH + 1) * sizeof(long) >= STR_VAR_LEN_MAX); + BUILD_BUG_ON((HIST_STACKTRACE_DEPTH + 1) * sizeof(long) > STR_VAR_LEN_MAX); for (i = 0, j = field_var_str_start; i < n_field_vars; i++) { struct field_var *field_var = field_vars[i]; -- cgit v1.2.3 From 9df0e49c5b9b8d051529be9994e4f92f2d20be6f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 26 Jan 2026 13:00:37 -0500 Subject: tracing: Remove duplicate ENABLE_EVENT_STR and DISABLE_EVENT_STR macros The macros ENABLE_EVENT_STR and DISABLE_EVENT_STR were added to trace.h so that more than one file can have access to them, but was never removed from their original location. Remove the duplicates. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Tom Zanussi Link: https://patch.msgid.link/20260126130037.4ba201f9@gandalf.local.home Fixes: d0bad49bb0a09 ("tracing: Add enable_hist/disable_hist triggers") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f372a6374164..4972e1a2b5f3 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -4097,11 +4097,6 @@ void trace_put_event_file(struct trace_event_file *file) EXPORT_SYMBOL_GPL(trace_put_event_file); #ifdef CONFIG_DYNAMIC_FTRACE - -/* Avoid typos */ -#define ENABLE_EVENT_STR "enable_event" -#define DISABLE_EVENT_STR "disable_event" - struct event_probe_data { struct trace_event_file *file; unsigned long count; -- cgit v1.2.3 From f7d327654b886a768fb3baa5b43d151b63e2d4ff Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 26 Jan 2026 18:11:47 -0500 Subject: bpf: Have __bpf_trace_run() use rcu_read_lock_dont_migrate() In order to switch the protection of tracepoint callbacks from preempt_disable() to srcu_read_lock_fast() the BPF callback from tracepoints needs to have migration prevention as the BPF programs expect to stay on the same CPU as they execute. Put together the RCU protection with migration prevention and use rcu_read_lock_dont_migrate() in __bpf_trace_run(). This will allow tracepoints callbacks to be preemptible. Link: https://lore.kernel.org/all/CAADnVQKvY026HSFGOsavJppm3-Ajm-VsLzY-OeFUe+BaKMRnDg@mail.gmail.com/ Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: "Paul E. McKenney" Cc: Sebastian Andrzej Siewior Cc: Alexei Starovoitov Link: https://patch.msgid.link/20260126231256.335034877@kernel.org Suggested-by: Alexei Starovoitov Signed-off-by: Steven Rostedt (Google) --- kernel/trace/bpf_trace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index fe28d86f7c35..abbf0177ad20 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2062,7 +2062,7 @@ void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args) struct bpf_run_ctx *old_run_ctx; struct bpf_trace_run_ctx run_ctx; - cant_sleep(); + rcu_read_lock_dont_migrate(); if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { bpf_prog_inc_misses_counter(prog); goto out; @@ -2071,13 +2071,12 @@ void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args) run_ctx.bpf_cookie = link->cookie; old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); - rcu_read_lock(); (void) bpf_prog_run(prog, args); - rcu_read_unlock(); bpf_reset_run_ctx(old_run_ctx); out: this_cpu_dec(*(prog->active)); + rcu_read_unlock_migrate(); } #define UNPACK(...) __VA_ARGS__ -- cgit v1.2.3 From 02b75ece53bb6e7b75b987d5728949451d1dc8a9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 30 Jan 2026 10:37:45 -0500 Subject: tracing: Add kerneldoc to trace_event_buffer_reserve() Add a appropriate kerneldoc to trace_event_buffer_reserve() to make it easier to understand how that function is used. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260130103745.1126e4af@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 4972e1a2b5f3..af6d1fe5cab7 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -649,6 +649,22 @@ bool trace_event_ignore_this_pid(struct trace_event_file *trace_file) } EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid); +/** + * trace_event_buffer_reserve - reserve space on the ring buffer for an event + * @fbuffer: information about how to save the event + * @trace_file: the instance file descriptor for the event + * @len: The length of the event + * + * The @fbuffer has information about the ring buffer and data will + * be added to it to be used by the call to trace_event_buffer_commit(). + * The @trace_file is the desrciptor with information about the status + * of the given event for a specific trace_array instance. + * The @len is the length of data to save for the event. + * + * Returns a pointer to the data on the ring buffer or NULL if the + * event was not reserved (event was filtered, too big, or the buffer + * simply was disabled for write). + */ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, struct trace_event_file *trace_file, unsigned long len) -- cgit v1.2.3 From 1c48f7ab72a8c9d6419622931e622e5247e979f5 Mon Sep 17 00:00:00 2001 From: Yaxiong Tian Date: Wed, 4 Feb 2026 09:53:44 +0800 Subject: tracing: Rename `eval_map_wq` and allow other parts of tracing use it The eval_map_work_func() function, though queued in eval_map_wq, holds the trace_event_sem read-write lock for a long time during kernel boot. This causes blocking issues for other functions. Rename eval_map_wq to trace_init_wq and make it global, thereby allowing other parts of tracing to schedule work on this queue asynchronously and avoiding blockage of the main boot thread. Link: https://patch.msgid.link/20260204015344.162818-1-tianyaxiong@kylinos.cn Suggested-by: Steven Rostedt Acked-by: Masami Hiramatsu (Google) Signed-off-by: Yaxiong Tian Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 18 +++++++++--------- kernel/trace/trace.h | 1 + 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 396d59202438..8c0f3cfd196b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -10785,7 +10785,7 @@ int tracing_init_dentry(void) extern struct trace_eval_map *__start_ftrace_eval_maps[]; extern struct trace_eval_map *__stop_ftrace_eval_maps[]; -static struct workqueue_struct *eval_map_wq __initdata; +struct workqueue_struct *trace_init_wq __initdata; static struct work_struct eval_map_work __initdata; static struct work_struct tracerfs_init_work __initdata; @@ -10801,15 +10801,15 @@ static int __init trace_eval_init(void) { INIT_WORK(&eval_map_work, eval_map_work_func); - eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0); - if (!eval_map_wq) { - pr_err("Unable to allocate eval_map_wq\n"); + trace_init_wq = alloc_workqueue("trace_init_wq", WQ_UNBOUND, 0); + if (!trace_init_wq) { + pr_err("Unable to allocate trace_init_wq\n"); /* Do work here */ eval_map_work_func(&eval_map_work); return -ENOMEM; } - queue_work(eval_map_wq, &eval_map_work); + queue_work(trace_init_wq, &eval_map_work); return 0; } @@ -10818,8 +10818,8 @@ subsys_initcall(trace_eval_init); static int __init trace_eval_sync(void) { /* Make sure the eval map updates are finished */ - if (eval_map_wq) - destroy_workqueue(eval_map_wq); + if (trace_init_wq) + destroy_workqueue(trace_init_wq); return 0; } @@ -10980,9 +10980,9 @@ static __init int tracer_init_tracefs(void) if (ret) return 0; - if (eval_map_wq) { + if (trace_init_wq) { INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func); - queue_work(eval_map_wq, &tracerfs_init_work); + queue_work(trace_init_wq, &tracerfs_init_work); } else { tracer_init_tracefs_work_func(NULL); } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 69e7defba6c6..bb68539c64b7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -769,6 +769,7 @@ extern cpumask_var_t __read_mostly tracing_buffer_mask; extern unsigned long nsecs_to_usecs(unsigned long nsecs); extern unsigned long tracing_thresh; +extern struct workqueue_struct *trace_init_wq __initdata; /* PID filtering */ -- cgit v1.2.3 From 0c2580a8094693578afa9b6cbcee406cf131920e Mon Sep 17 00:00:00 2001 From: Yaxiong Tian Date: Wed, 4 Feb 2026 09:53:53 +0800 Subject: blktrace: Make init_blk_tracer() asynchronous The init_blk_tracer() function causes significant boot delay as it waits for the trace_event_sem lock held by trace_event_update_all(). Specifically, its child function register_trace_event() requires this lock, which is occupied for an extended period during boot. To resolve this, the execution of primary init_blk_tracer() is moved to the trace_init_wq workqueue, allowing it to run asynchronously, and prevent blocking the main boot thread. Link: https://patch.msgid.link/20260204015353.163331-1-tianyaxiong@kylinos.cn Acked-by: Masami Hiramatsu (Google) Signed-off-by: Yaxiong Tian Signed-off-by: Steven Rostedt (Google) --- kernel/trace/blktrace.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index d031c8d80be4..d611cd1f02ef 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1832,7 +1832,9 @@ static struct trace_event trace_blk_event = { .funcs = &trace_blk_event_funcs, }; -static int __init init_blk_tracer(void) +static struct work_struct blktrace_works __initdata; + +static int __init __init_blk_tracer(void) { if (!register_trace_event(&trace_blk_event)) { pr_warn("Warning: could not register block events\n"); @@ -1852,6 +1854,25 @@ static int __init init_blk_tracer(void) return 0; } +static void __init blktrace_works_func(struct work_struct *work) +{ + __init_blk_tracer(); +} + +static int __init init_blk_tracer(void) +{ + int ret = 0; + + if (trace_init_wq) { + INIT_WORK(&blktrace_works, blktrace_works_func); + queue_work(trace_init_wq, &blktrace_works); + } else { + ret = __init_blk_tracer(); + } + + return ret; +} + device_initcall(init_blk_tracer); static int blk_trace_remove_queue(struct request_queue *q) -- cgit v1.2.3 From 2cdfe39dc9447a09c568da1b6351c70b770dd923 Mon Sep 17 00:00:00 2001 From: Yaxiong Tian Date: Wed, 4 Feb 2026 09:54:01 +0800 Subject: tracing/kprobes: Skip setup_boot_kprobe_events() when no cmdline event When the 'kprobe_event=' kernel command-line parameter is not provided, there is no need to execute setup_boot_kprobe_events(). This change optimizes the initialization function init_kprobe_trace() by skipping unnecessary work and effectively prevents potential blocking that could arise from contention on the event_mutex lock in subsequent operations. Link: https://patch.msgid.link/20260204015401.163748-1-tianyaxiong@kylinos.cn Acked-by: Masami Hiramatsu (Google) Signed-off-by: Yaxiong Tian Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_kprobe.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9953506370a5..89d2740f7bb5 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -2048,6 +2048,10 @@ static __init int init_kprobe_trace(void) trace_create_file("kprobe_profile", TRACE_MODE_READ, NULL, NULL, &kprobe_profile_ops); + /* If no 'kprobe_event=' cmd is provided, return directly. */ + if (kprobe_boot_events_buf[0] == '\0') + return 0; + setup_boot_kprobe_events(); return 0; -- cgit v1.2.3 From c8b039c3e3763281c867489a926c52716337da59 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 6 Feb 2026 10:13:51 -0500 Subject: tracing: Have all triggers expect a file parameter When the triggers were first created, they may not have had a file parameter passed to them and things needed to be done generically. But today, all triggers have a file parameter passed to them. Remove the generic code and add a "if (WARN_ON_ONCE(!file))" to each trigger. Cc: Mathieu Desnoyers Acked-by: Masami Hiramatsu (Google) Reviewed-by: Tom Zanussi Link: https://patch.msgid.link/20260206101351.609d8906@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_trigger.c | 62 ++++++++++++++----------------------- 1 file changed, 24 insertions(+), 38 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 06b75bcfc7b8..7fa26327c9c7 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1347,18 +1347,13 @@ traceon_trigger(struct event_trigger_data *data, { struct trace_event_file *file = data->private_data; - if (file) { - if (tracer_tracing_is_on(file->tr)) - return; - - tracer_tracing_on(file->tr); + if (WARN_ON_ONCE(!file)) return; - } - if (tracing_is_on()) + if (tracer_tracing_is_on(file->tr)) return; - tracing_on(); + tracer_tracing_on(file->tr); } static bool @@ -1368,13 +1363,11 @@ traceon_count_func(struct event_trigger_data *data, { struct trace_event_file *file = data->private_data; - if (file) { - if (tracer_tracing_is_on(file->tr)) - return false; - } else { - if (tracing_is_on()) - return false; - } + if (WARN_ON_ONCE(!file)) + return false; + + if (tracer_tracing_is_on(file->tr)) + return false; if (!data->count) return false; @@ -1392,18 +1385,13 @@ traceoff_trigger(struct event_trigger_data *data, { struct trace_event_file *file = data->private_data; - if (file) { - if (!tracer_tracing_is_on(file->tr)) - return; - - tracer_tracing_off(file->tr); + if (WARN_ON_ONCE(!file)) return; - } - if (!tracing_is_on()) + if (!tracer_tracing_is_on(file->tr)) return; - tracing_off(); + tracer_tracing_off(file->tr); } static bool @@ -1413,13 +1401,11 @@ traceoff_count_func(struct event_trigger_data *data, { struct trace_event_file *file = data->private_data; - if (file) { - if (!tracer_tracing_is_on(file->tr)) - return false; - } else { - if (!tracing_is_on()) - return false; - } + if (WARN_ON_ONCE(!file)) + return false; + + if (!tracer_tracing_is_on(file->tr)) + return false; if (!data->count) return false; @@ -1481,10 +1467,10 @@ snapshot_trigger(struct event_trigger_data *data, { struct trace_event_file *file = data->private_data; - if (file) - tracing_snapshot_instance(file->tr); - else - tracing_snapshot(); + if (WARN_ON_ONCE(!file)) + return; + + tracing_snapshot_instance(file->tr); } static int @@ -1570,10 +1556,10 @@ stacktrace_trigger(struct event_trigger_data *data, { struct trace_event_file *file = data->private_data; - if (file) - __trace_stack(file->tr, tracing_gen_ctx_dec(), STACK_SKIP); - else - trace_dump_stack(STACK_SKIP); + if (WARN_ON_ONCE(!file)) + return; + + __trace_stack(file->tr, tracing_gen_ctx_dec(), STACK_SKIP); } static int -- cgit v1.2.3 From 326669faf3cbfda31b2203f0a66aa87812062e5f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 6 Feb 2026 14:37:52 -0500 Subject: tracing: Move tracing_set_filter_buffering() into trace_events_hist.c The function tracing_set_filter_buffering() is only used in trace_events_hist.c. Move it to that file and make it static. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://patch.msgid.link/20260206195936.617080218@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 20 -------------------- kernel/trace/trace.h | 1 - kernel/trace/trace_events_hist.c | 20 ++++++++++++++++++++ 3 files changed, 20 insertions(+), 21 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8c0f3cfd196b..702ef851db45 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7891,26 +7891,6 @@ u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_eve return ring_buffer_event_time_stamp(buffer, rbe); } -/* - * Set or disable using the per CPU trace_buffer_event when possible. - */ -int tracing_set_filter_buffering(struct trace_array *tr, bool set) -{ - guard(mutex)(&trace_types_lock); - - if (set && tr->no_filter_buffering_ref++) - return 0; - - if (!set) { - if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) - return -EINVAL; - - --tr->no_filter_buffering_ref; - } - - return 0; -} - struct ftrace_buffer_info { struct trace_iterator iter; void *spare; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index bb68539c64b7..31fb137e1c66 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -476,7 +476,6 @@ extern struct trace_array *trace_array_find(const char *instance); extern struct trace_array *trace_array_find_get(const char *instance); extern u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe); -extern int tracing_set_filter_buffering(struct trace_array *tr, bool set); extern int tracing_set_clock(struct trace_array *tr, const char *clockstr); extern bool trace_clock_in_ns(struct trace_array *tr); diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 0fc641461be5..e6f449f53afc 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -6531,6 +6531,26 @@ static bool existing_hist_update_only(char *glob, return updated; } +/* + * Set or disable using the per CPU trace_buffer_event when possible. + */ +static int tracing_set_filter_buffering(struct trace_array *tr, bool set) +{ + guard(mutex)(&trace_types_lock); + + if (set && tr->no_filter_buffering_ref++) + return 0; + + if (!set) { + if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) + return -EINVAL; + + --tr->no_filter_buffering_ref; + } + + return 0; +} + static int hist_register_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) -- cgit v1.2.3 From ba73713da50e5c24499ca8941171593466ea34f7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 7 Feb 2026 19:11:01 -0500 Subject: tracing: Clean up use of trace_create_maxlat_file() In trace.c, the function trace_create_maxlat_file() is defined behind the #ifdef CONFIG_TRACER_MAX_TRACE block. The #else part defines it as: #define trace_create_maxlat_file(tr, d_tracer) \ trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \ d_tracer, tr, &tracing_max_lat_fops) But the one place that it it used has: #ifdef CONFIG_TRACER_MAX_TRACE trace_create_maxlat_file(tr, d_tracer); #endif Which is pointless and also wrong! It only gets created when both CONFIG_TRACE_MAX_TRACE and CONFIG_FS_NOTIFY is defined, but the file itself should not be dependent on CONFIG_FS_NOTIFY. Always create that file when TRACE_MAX_TRACE is defined regardless if FS_NOTIFY is or is not. Cc: Mathieu Desnoyers Acked-by: Masami Hiramatsu (Google) Link: https://patch.msgid.link/20260207191101.0e014abd@robin Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 702ef851db45..d02c4004c718 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1912,10 +1912,7 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) unsigned long __read_mostly tracing_thresh; #ifdef CONFIG_TRACER_MAX_TRACE -static const struct file_operations tracing_max_lat_fops; - #ifdef LATENCY_FS_NOTIFY - static struct workqueue_struct *fsnotify_wq; static void latency_fsnotify_workfn(struct work_struct *work) @@ -1932,17 +1929,6 @@ static void latency_fsnotify_workfn_irq(struct irq_work *iwork) queue_work(fsnotify_wq, &tr->fsnotify_work); } -static void trace_create_maxlat_file(struct trace_array *tr, - struct dentry *d_tracer) -{ - INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn); - init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq); - tr->d_max_latency = trace_create_file("tracing_max_latency", - TRACE_MODE_WRITE, - d_tracer, tr, - &tracing_max_lat_fops); -} - __init static int latency_fsnotify_init(void) { fsnotify_wq = alloc_workqueue("tr_max_lat_wq", @@ -1967,14 +1953,22 @@ void latency_fsnotify(struct trace_array *tr) */ irq_work_queue(&tr->fsnotify_irqwork); } +#endif /* !LATENCY_FS_NOTIFY */ -#else /* !LATENCY_FS_NOTIFY */ - -#define trace_create_maxlat_file(tr, d_tracer) \ - trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \ - d_tracer, tr, &tracing_max_lat_fops) +static const struct file_operations tracing_max_lat_fops; +static void trace_create_maxlat_file(struct trace_array *tr, + struct dentry *d_tracer) +{ +#ifdef LATENCY_FS_NOTIFY + INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn); + init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq); #endif + tr->d_max_latency = trace_create_file("tracing_max_latency", + TRACE_MODE_WRITE, + d_tracer, tr, + &tracing_max_lat_fops); +} /* * Copy the new maximum trace into the separate maximum-trace @@ -2109,7 +2103,9 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) __update_max_tr(tr, tsk, cpu); arch_spin_unlock(&tr->max_lock); } - +#else /* !CONFIG_TRACER_MAX_TRACE */ +static inline void trace_create_maxlat_file(struct trace_array *tr, + struct dentry *d_tracer) { } #endif /* CONFIG_TRACER_MAX_TRACE */ struct pipe_wait { @@ -10664,9 +10660,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) create_trace_options_dir(tr); -#ifdef CONFIG_TRACER_MAX_TRACE trace_create_maxlat_file(tr, d_tracer); -#endif if (ftrace_create_function_files(tr, d_tracer)) MEM_FAIL(1, "Could not allocate function filter files"); -- cgit v1.2.3 From 64dee86ad7de3d59bae041e0d8f80ef89ddc4cf6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 7 Feb 2026 22:24:18 -0500 Subject: tracing: Make tracing_disabled global for tracing system The tracing_disabled variable is set to one on boot up to prevent some parts of tracing to access the tracing infrastructure before it is set up. It also can be set after boot if an anomaly is discovered. It is currently a static variable in trace.c and can be accessed via a function call trace_is_disabled(). There's really no reason to use a function call as the tracing subsystem should be able to access it directly. By making the variable accessed directly, code can be moved out of trace.c without adding overhead of a function call to see if tracing is disabled or not. Make tracing_disabled global and remove the tracing_is_disabled() helper function. Also add some "unlikely()"s around tracing_disabled where it's checked in hot paths. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://patch.msgid.link/20260208032449.483690153@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 13 ++++--------- kernel/trace/trace.h | 3 ++- kernel/trace/trace_events.c | 2 +- kernel/trace/trace_kprobe.c | 2 +- 4 files changed, 8 insertions(+), 12 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d02c4004c718..1ff40c88e75c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -114,7 +114,7 @@ DEFINE_PER_CPU(bool, trace_taskinfo_save); * of the tracer is successful. But that is the only place that sets * this back to zero. */ -static int tracing_disabled = 1; +int tracing_disabled = 1; cpumask_var_t __read_mostly tracing_buffer_mask; @@ -3423,7 +3423,7 @@ int __trace_array_vprintk(struct trace_buffer *buffer, unsigned int trace_ctx; char *tbuffer; - if (tracing_disabled) + if (unlikely(tracing_disabled)) return 0; /* Don't pollute graph traces with trace_vprintk internals */ @@ -4765,11 +4765,6 @@ int tracing_open_generic(struct inode *inode, struct file *filp) return 0; } -bool tracing_is_disabled(void) -{ - return (tracing_disabled) ? true: false; -} - /* * Open and update trace_array ref count. * Must have the current trace_array passed to it. @@ -7609,7 +7604,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, unsigned long ip; char *buf; - if (tracing_disabled) + if (unlikely(tracing_disabled)) return -EINVAL; if (!(tr->trace_flags & TRACE_ITER(MARKERS))) @@ -7689,7 +7684,7 @@ tracing_mark_raw_write(struct file *filp, const char __user *ubuf, ssize_t written = -ENODEV; char *buf; - if (tracing_disabled) + if (unlikely(tracing_disabled)) return -EINVAL; if (!(tr->trace_flags & TRACE_ITER(MARKERS))) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 31fb137e1c66..433705bef480 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -657,6 +657,8 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu) return iter->buffer_iter ? iter->buffer_iter[cpu] : NULL; } +extern int tracing_disabled; + int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); void tracing_reset_online_cpus(struct array_buffer *buf); @@ -668,7 +670,6 @@ int tracing_release_generic_tr(struct inode *inode, struct file *file); int tracing_open_file_tr(struct inode *inode, struct file *filp); int tracing_release_file_tr(struct inode *inode, struct file *filp); int tracing_single_release_file_tr(struct inode *inode, struct file *filp); -bool tracing_is_disabled(void); bool tracer_tracing_is_on(struct trace_array *tr); void tracer_tracing_on(struct trace_array *tr); void tracer_tracing_off(struct trace_array *tr); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index af6d1fe5cab7..61fe01dce7a6 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2268,7 +2268,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) struct event_subsystem *system = NULL; int ret; - if (tracing_is_disabled()) + if (unlikely(tracing_disabled)) return -ENODEV; /* Make sure the system still exists */ diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 89d2740f7bb5..061658518605 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -2083,7 +2083,7 @@ static __init int kprobe_trace_self_tests_init(void) struct trace_kprobe *tk; struct trace_event_file *file; - if (tracing_is_disabled()) + if (unlikely(tracing_disabled)) return -ENODEV; if (tracing_selftest_disabled) -- cgit v1.2.3 From a4f77ffc8eb6247ad00c53d297a145e47594ce76 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 7 Feb 2026 22:24:19 -0500 Subject: tracing: Make tracing_selftest_running global to the tracing subsystem The file trace.c has become a catchall for most things tracing. Start making it smaller by breaking out various aspects into their own files. Make the variable tracing_selftest_running global so that it can be used by other files in the tracing subsystem and trace.c can be split up. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://patch.msgid.link/20260208032449.648932796@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 3 +-- kernel/trace/trace.h | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1ff40c88e75c..f040ee4fe101 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -67,7 +67,7 @@ * insertions into the ring-buffer such as trace_printk could occurred * at the same time, giving false positive or negative results. */ -static bool __read_mostly tracing_selftest_running; +bool __read_mostly tracing_selftest_running; /* * If boot-time tracing including tracers/events via kernel cmdline @@ -83,7 +83,6 @@ void __init disable_tracing_selftest(const char *reason) } } #else -#define tracing_selftest_running 0 #define tracing_selftest_disabled 0 #endif diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 433705bef480..19cffc7b5852 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -863,6 +863,7 @@ extern int trace_selftest_startup_nop(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr); +extern bool __read_mostly tracing_selftest_running; /* * Tracer data references selftest functions that only occur * on boot up. These can be __init functions. Thus, when selftests @@ -875,6 +876,7 @@ static inline void __init disable_tracing_selftest(const char *reason) } /* Tracers are seldom changed. Optimize when selftests are disabled. */ #define __tracer_data __read_mostly +#define tracing_selftest_running 0 #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); -- cgit v1.2.3 From 0e730bc067e7a790d61344dbf6d9dfdce7f99ea3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 7 Feb 2026 22:24:20 -0500 Subject: tracing: Move __trace_buffer_{un}lock_*() functions to trace.h The file trace.c has become a catchall for most things tracing. Start making it smaller by breaking out various aspects into their own files. Move the __always_inline functions __trace_buffer_lock_reserve(), __trace_buffer_unlock_commit() and trace_event_setup() into trace.h. The trace.c file will be split up and these functions will be used in more than one of these files. As they are already __always_inline they can easily be moved into the trace.h header file. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://patch.msgid.link/20260208032449.813550600@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 42 ------------------------------------------ kernel/trace/trace.h | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 42 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f040ee4fe101..55cd0c774886 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1058,30 +1058,6 @@ static inline void ftrace_trace_stack(struct trace_array *tr, #endif -static __always_inline void -trace_event_setup(struct ring_buffer_event *event, - int type, unsigned int trace_ctx) -{ - struct trace_entry *ent = ring_buffer_event_data(event); - - tracing_generic_entry_update(ent, type, trace_ctx); -} - -static __always_inline struct ring_buffer_event * -__trace_buffer_lock_reserve(struct trace_buffer *buffer, - int type, - unsigned long len, - unsigned int trace_ctx) -{ - struct ring_buffer_event *event; - - event = ring_buffer_lock_reserve(buffer, len); - if (event != NULL) - trace_event_setup(event, type, trace_ctx); - - return event; -} - void tracer_tracing_on(struct trace_array *tr) { if (tr->array_buffer.buffer) @@ -1109,24 +1085,6 @@ void tracing_on(void) } EXPORT_SYMBOL_GPL(tracing_on); - -static __always_inline void -__buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) -{ - __this_cpu_write(trace_taskinfo_save, true); - - /* If this is the temp buffer, we need to commit fully */ - if (this_cpu_read(trace_buffered_event) == event) { - /* Length is in event->array[0] */ - ring_buffer_write(buffer, event->array[0], &event->array[1]); - /* Release the temp buffer */ - this_cpu_dec(trace_buffered_event_cnt); - /* ring_buffer_unlock_commit() enables preemption */ - preempt_enable_notrace(); - } else - ring_buffer_unlock_commit(buffer); -} - int __trace_array_puts(struct trace_array *tr, unsigned long ip, const char *str, int size) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 19cffc7b5852..c2beabe96952 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1568,6 +1568,47 @@ char *trace_user_fault_read(struct trace_user_buf_info *tinfo, const char __user *ptr, size_t size, trace_user_buf_copy copy_func, void *data); +static __always_inline void +trace_event_setup(struct ring_buffer_event *event, + int type, unsigned int trace_ctx) +{ + struct trace_entry *ent = ring_buffer_event_data(event); + + tracing_generic_entry_update(ent, type, trace_ctx); +} + +static __always_inline struct ring_buffer_event * +__trace_buffer_lock_reserve(struct trace_buffer *buffer, + int type, + unsigned long len, + unsigned int trace_ctx) +{ + struct ring_buffer_event *event; + + event = ring_buffer_lock_reserve(buffer, len); + if (event != NULL) + trace_event_setup(event, type, trace_ctx); + + return event; +} + +static __always_inline void +__buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) +{ + __this_cpu_write(trace_taskinfo_save, true); + + /* If this is the temp buffer, we need to commit fully */ + if (this_cpu_read(trace_buffered_event) == event) { + /* Length is in event->array[0] */ + ring_buffer_write(buffer, event->array[0], &event->array[1]); + /* Release the temp buffer */ + this_cpu_dec(trace_buffered_event_cnt); + /* ring_buffer_unlock_commit() enables preemption */ + preempt_enable_notrace(); + } else + ring_buffer_unlock_commit(buffer); +} + static inline void __trace_event_discard_commit(struct trace_buffer *buffer, struct ring_buffer_event *event) -- cgit v1.2.3 From 3e6c8f80e5ddd0644e509547c61366a2c09117b0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 7 Feb 2026 22:24:21 -0500 Subject: tracing: Move ftrace_trace_stack() out of trace.c and into trace.h The file trace.c has become a catchall for most things tracing. Start making it smaller by breaking out various aspects into their own files. Make ftrace_trace_stack() into a static inline that tests if stack tracing is enabled and if so to call __ftrace_trace_stack() to do the stack trace. This keeps the test inlined in the fast paths and only does the function call if stack tracing is enabled. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://patch.msgid.link/20260208032449.974218132@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 45 ++++----------------------------------------- kernel/trace/trace.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 41 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 55cd0c774886..a515b5241391 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1032,32 +1032,6 @@ static inline void trace_access_lock_init(void) #endif -#ifdef CONFIG_STACKTRACE -static void __ftrace_trace_stack(struct trace_array *tr, - struct trace_buffer *buffer, - unsigned int trace_ctx, - int skip, struct pt_regs *regs); -static inline void ftrace_trace_stack(struct trace_array *tr, - struct trace_buffer *buffer, - unsigned int trace_ctx, - int skip, struct pt_regs *regs); - -#else -static inline void __ftrace_trace_stack(struct trace_array *tr, - struct trace_buffer *buffer, - unsigned int trace_ctx, - int skip, struct pt_regs *regs) -{ -} -static inline void ftrace_trace_stack(struct trace_array *tr, - struct trace_buffer *buffer, - unsigned long trace_ctx, - int skip, struct pt_regs *regs) -{ -} - -#endif - void tracer_tracing_on(struct trace_array *tr) { if (tr->array_buffer.buffer) @@ -2964,10 +2938,10 @@ struct ftrace_stacks { static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks); static DEFINE_PER_CPU(int, ftrace_stack_reserve); -static void __ftrace_trace_stack(struct trace_array *tr, - struct trace_buffer *buffer, - unsigned int trace_ctx, - int skip, struct pt_regs *regs) +void __ftrace_trace_stack(struct trace_array *tr, + struct trace_buffer *buffer, + unsigned int trace_ctx, + int skip, struct pt_regs *regs) { struct ring_buffer_event *event; unsigned int size, nr_entries; @@ -3050,17 +3024,6 @@ static void __ftrace_trace_stack(struct trace_array *tr, trace_clear_recursion(bit); } -static inline void ftrace_trace_stack(struct trace_array *tr, - struct trace_buffer *buffer, - unsigned int trace_ctx, - int skip, struct pt_regs *regs) -{ - if (!(tr->trace_flags & TRACE_ITER(STACKTRACE))) - return; - - __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs); -} - void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, int skip) { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c2beabe96952..605ee23f3262 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -2279,6 +2279,37 @@ static inline void sanitize_event_name(char *name) *name = '_'; } +#ifdef CONFIG_STACKTRACE +void __ftrace_trace_stack(struct trace_array *tr, + struct trace_buffer *buffer, + unsigned int trace_ctx, + int skip, struct pt_regs *regs); + +static __always_inline void ftrace_trace_stack(struct trace_array *tr, + struct trace_buffer *buffer, + unsigned int trace_ctx, + int skip, struct pt_regs *regs) +{ + if (!(tr->trace_flags & TRACE_ITER(STACKTRACE))) + return; + + __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs); +} +#else +static inline void __ftrace_trace_stack(struct trace_array *tr, + struct trace_buffer *buffer, + unsigned int trace_ctx, + int skip, struct pt_regs *regs) +{ +} +static inline void ftrace_trace_stack(struct trace_array *tr, + struct trace_buffer *buffer, + unsigned long trace_ctx, + int skip, struct pt_regs *regs) +{ +} +#endif + /* * This is a generic way to read and write a u64 value from a file in tracefs. * -- cgit v1.2.3 From 1c53d781d42541adc5ba76b4f843a3ff382e01fb Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 7 Feb 2026 22:24:22 -0500 Subject: tracing: Make printk_trace global for tracing system The printk_trace is used to determine which trace_array trace_printk() writes to. By making it a global variable among the tracing subsystem it will allow the trace_printk functions to be moved out of trace.c and still have direct access to that variable. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://patch.msgid.link/20260208032450.144525891@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 2 +- kernel/trace/trace.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a515b5241391..4a73822e2603 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -534,7 +534,7 @@ static struct trace_array global_trace = { .trace_flags = TRACE_DEFAULT_FLAGS, }; -static struct trace_array *printk_trace = &global_trace; +struct trace_array *printk_trace = &global_trace; /* List of trace_arrays interested in the top level trace_marker */ static LIST_HEAD(marker_copies); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 605ee23f3262..921e4daa2825 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -482,6 +482,8 @@ extern bool trace_clock_in_ns(struct trace_array *tr); extern unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr); +extern struct trace_array *printk_trace; + /* * The global tracer (top) should be the first trace array added, * but we check the flag anyway. -- cgit v1.2.3 From 93c88d06accdeceee4fbd243b084d3749bcd96d7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 7 Feb 2026 22:24:23 -0500 Subject: tracing: Make tracing_update_buffers() take NULL for global_trace The trace.c file has become a dumping ground for all tracing code and has become quite large. In order to move the trace_printk functions out of it these functions can not access global_trace directly, as that is something that needs to stay static in trace.c. Have tracing_update_buffers() take NULL for its trace_array to denote it should work on the global_trace top level trace_array allows that function to be used outside of trace.c and still update the global_trace trace_array. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://patch.msgid.link/20260208032450.318864210@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4a73822e2603..601b6f622391 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3234,7 +3234,7 @@ void trace_printk_init_buffers(void) pr_warn("**********************************************************\n"); /* Expand the buffers to set size */ - if (tracing_update_buffers(&global_trace) < 0) + if (tracing_update_buffers(NULL) < 0) pr_err("Failed to expand tracing buffers for trace_printk() calls\n"); else buffers_allocated = 1; @@ -6186,6 +6186,9 @@ int tracing_update_buffers(struct trace_array *tr) { int ret = 0; + if (!tr) + tr = &global_trace; + guard(mutex)(&trace_types_lock); update_last_data(tr); -- cgit v1.2.3 From f377912b3dd71312cbf9eaf2c60263cb6e7cba59 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 7 Feb 2026 22:24:24 -0500 Subject: tracing: Have trace_printk functions use flags instead of using global_trace The trace.c file has become a dumping ground for all tracing code and has become quite large. In order to move the trace_printk functions out of it these functions can not access global_trace directly, as that is something that needs to stay static in trace.c. Instead of testing the trace_array tr pointer to &global_trace, test the tr->flags to see if TRACE_ARRAY_FL_GLOBAL set. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://patch.msgid.link/20260208032450.491116245@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 601b6f622391..f4ae80564615 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1071,7 +1071,8 @@ int __trace_array_puts(struct trace_array *tr, unsigned long ip, if (!(tr->trace_flags & TRACE_ITER(PRINTK))) return 0; - if (unlikely(tracing_selftest_running && tr == &global_trace)) + if (unlikely(tracing_selftest_running && + (tr->flags & TRACE_ARRAY_FL_GLOBAL))) return 0; if (unlikely(tracing_disabled)) @@ -3386,7 +3387,7 @@ out_nobuffer: int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args) { - if (tracing_selftest_running && tr == &global_trace) + if (tracing_selftest_running && (tr->flags & TRACE_ARRAY_FL_GLOBAL)) return 0; return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args); @@ -3422,7 +3423,7 @@ int trace_array_printk(struct trace_array *tr, return -ENOENT; /* This is only allowed for created instances */ - if (tr == &global_trace) + if (tr->flags & TRACE_ARRAY_FL_GLOBAL) return 0; if (!(tr->trace_flags & TRACE_ITER(PRINTK))) @@ -3449,7 +3450,7 @@ int trace_array_init_printk(struct trace_array *tr) return -ENOENT; /* This is only allowed for created instances */ - if (tr == &global_trace) + if (tr->flags & TRACE_ARRAY_FL_GLOBAL) return -EINVAL; return alloc_percpu_trace_buffer(); -- cgit v1.2.3 From af1eea12ad24f62d65714c5318841894278a7aaa Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 7 Feb 2026 22:24:25 -0500 Subject: tracing: Use system_state in trace_printk_init_buffers() The function trace_printk_init_buffers() is used to expand tha trace_printk buffers when trace_printk() is used within the kernel or in modules. On kernel boot up, it holds off from starting the sched switch cmdline recorder, but will start it immediately when it is added by a module. Currently it uses a trick to see if the global_trace buffer has been allocated or not to know if it was called by module load or not. But this is more of a hack, and can not be used when this code is moved out of trace.c. Instead simply look at the system_state and if it is running then it is know that it could only be called by module load. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://patch.msgid.link/20260208032450.660237094@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f4ae80564615..4066c33674e7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3243,10 +3243,9 @@ void trace_printk_init_buffers(void) /* * trace_printk_init_buffers() can be called by modules. * If that happens, then we need to start cmdline recording - * directly here. If the global_trace.buffer is already - * allocated here, then this was called by module code. + * directly here. */ - if (global_trace.array_buffer.buffer) + if (system_state == SYSTEM_RUNNING) tracing_start_cmdline_record(); } EXPORT_SYMBOL_GPL(trace_printk_init_buffers); -- cgit v1.2.3 From 27931ee8f45415db3a10586f9d5b6f77ef7d7d84 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 7 Feb 2026 22:24:26 -0500 Subject: tracing: Move trace_printk functions out of trace.c and into trace_printk.c The file trace.c has become a catchall for most things tracing. Start making it smaller by breaking out various aspects into their own files. Move the functions associated to the trace_printk operations out of trace.c and into trace_printk.c. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://patch.msgid.link/20260208032450.828744197@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 431 -------------------------------------------- kernel/trace/trace.h | 1 + kernel/trace/trace_printk.c | 431 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 432 insertions(+), 431 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4066c33674e7..5812b830c1fa 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -539,17 +539,6 @@ struct trace_array *printk_trace = &global_trace; /* List of trace_arrays interested in the top level trace_marker */ static LIST_HEAD(marker_copies); -static __always_inline bool printk_binsafe(struct trace_array *tr) -{ - /* - * The binary format of traceprintk can cause a crash if used - * by a buffer from another boot. Force the use of the - * non binary version of trace_printk if the trace_printk - * buffer is a boot mapped ring buffer. - */ - return !(tr->flags & TRACE_ARRAY_FL_BOOT); -} - static void update_printk_trace(struct trace_array *tr) { if (printk_trace == tr) @@ -1059,108 +1048,6 @@ void tracing_on(void) } EXPORT_SYMBOL_GPL(tracing_on); -int __trace_array_puts(struct trace_array *tr, unsigned long ip, - const char *str, int size) -{ - struct ring_buffer_event *event; - struct trace_buffer *buffer; - struct print_entry *entry; - unsigned int trace_ctx; - int alloc; - - if (!(tr->trace_flags & TRACE_ITER(PRINTK))) - return 0; - - if (unlikely(tracing_selftest_running && - (tr->flags & TRACE_ARRAY_FL_GLOBAL))) - return 0; - - if (unlikely(tracing_disabled)) - return 0; - - alloc = sizeof(*entry) + size + 2; /* possible \n added */ - - trace_ctx = tracing_gen_ctx(); - buffer = tr->array_buffer.buffer; - guard(ring_buffer_nest)(buffer); - event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, - trace_ctx); - if (!event) - return 0; - - entry = ring_buffer_event_data(event); - entry->ip = ip; - - memcpy(&entry->buf, str, size); - - /* Add a newline if necessary */ - if (entry->buf[size - 1] != '\n') { - entry->buf[size] = '\n'; - entry->buf[size + 1] = '\0'; - } else - entry->buf[size] = '\0'; - - __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); - return size; -} -EXPORT_SYMBOL_GPL(__trace_array_puts); - -/** - * __trace_puts - write a constant string into the trace buffer. - * @ip: The address of the caller - * @str: The constant string to write - * @size: The size of the string. - */ -int __trace_puts(unsigned long ip, const char *str, int size) -{ - return __trace_array_puts(printk_trace, ip, str, size); -} -EXPORT_SYMBOL_GPL(__trace_puts); - -/** - * __trace_bputs - write the pointer to a constant string into trace buffer - * @ip: The address of the caller - * @str: The constant string to write to the buffer to - */ -int __trace_bputs(unsigned long ip, const char *str) -{ - struct trace_array *tr = READ_ONCE(printk_trace); - struct ring_buffer_event *event; - struct trace_buffer *buffer; - struct bputs_entry *entry; - unsigned int trace_ctx; - int size = sizeof(struct bputs_entry); - - if (!printk_binsafe(tr)) - return __trace_puts(ip, str, strlen(str)); - - if (!(tr->trace_flags & TRACE_ITER(PRINTK))) - return 0; - - if (unlikely(tracing_selftest_running || tracing_disabled)) - return 0; - - trace_ctx = tracing_gen_ctx(); - buffer = tr->array_buffer.buffer; - - guard(ring_buffer_nest)(buffer); - event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, - trace_ctx); - if (!event) - return 0; - - entry = ring_buffer_event_data(event); - entry->ip = ip; - entry->str = str; - - __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); - - return 1; -} -EXPORT_SYMBOL_GPL(__trace_bputs); - #ifdef CONFIG_TRACER_SNAPSHOT static void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data) @@ -3159,324 +3046,6 @@ void trace_last_func_repeats(struct trace_array *tr, __buffer_unlock_commit(buffer, event); } -/* created for use with alloc_percpu */ -struct trace_buffer_struct { - int nesting; - char buffer[4][TRACE_BUF_SIZE]; -}; - -static struct trace_buffer_struct __percpu *trace_percpu_buffer; - -/* - * This allows for lockless recording. If we're nested too deeply, then - * this returns NULL. - */ -static char *get_trace_buf(void) -{ - struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer); - - if (!trace_percpu_buffer || buffer->nesting >= 4) - return NULL; - - buffer->nesting++; - - /* Interrupts must see nesting incremented before we use the buffer */ - barrier(); - return &buffer->buffer[buffer->nesting - 1][0]; -} - -static void put_trace_buf(void) -{ - /* Don't let the decrement of nesting leak before this */ - barrier(); - this_cpu_dec(trace_percpu_buffer->nesting); -} - -static int alloc_percpu_trace_buffer(void) -{ - struct trace_buffer_struct __percpu *buffers; - - if (trace_percpu_buffer) - return 0; - - buffers = alloc_percpu(struct trace_buffer_struct); - if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer")) - return -ENOMEM; - - trace_percpu_buffer = buffers; - return 0; -} - -static int buffers_allocated; - -void trace_printk_init_buffers(void) -{ - if (buffers_allocated) - return; - - if (alloc_percpu_trace_buffer()) - return; - - /* trace_printk() is for debug use only. Don't use it in production. */ - - pr_warn("\n"); - pr_warn("**********************************************************\n"); - pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); - pr_warn("** **\n"); - pr_warn("** trace_printk() being used. Allocating extra memory. **\n"); - pr_warn("** **\n"); - pr_warn("** This means that this is a DEBUG kernel and it is **\n"); - pr_warn("** unsafe for production use. **\n"); - pr_warn("** **\n"); - pr_warn("** If you see this message and you are not debugging **\n"); - pr_warn("** the kernel, report this immediately to your vendor! **\n"); - pr_warn("** **\n"); - pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); - pr_warn("**********************************************************\n"); - - /* Expand the buffers to set size */ - if (tracing_update_buffers(NULL) < 0) - pr_err("Failed to expand tracing buffers for trace_printk() calls\n"); - else - buffers_allocated = 1; - - /* - * trace_printk_init_buffers() can be called by modules. - * If that happens, then we need to start cmdline recording - * directly here. - */ - if (system_state == SYSTEM_RUNNING) - tracing_start_cmdline_record(); -} -EXPORT_SYMBOL_GPL(trace_printk_init_buffers); - -void trace_printk_start_comm(void) -{ - /* Start tracing comms if trace printk is set */ - if (!buffers_allocated) - return; - tracing_start_cmdline_record(); -} - -static void trace_printk_start_stop_comm(int enabled) -{ - if (!buffers_allocated) - return; - - if (enabled) - tracing_start_cmdline_record(); - else - tracing_stop_cmdline_record(); -} - -/** - * trace_vbprintk - write binary msg to tracing buffer - * @ip: The address of the caller - * @fmt: The string format to write to the buffer - * @args: Arguments for @fmt - */ -int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) -{ - struct ring_buffer_event *event; - struct trace_buffer *buffer; - struct trace_array *tr = READ_ONCE(printk_trace); - struct bprint_entry *entry; - unsigned int trace_ctx; - char *tbuffer; - int len = 0, size; - - if (!printk_binsafe(tr)) - return trace_vprintk(ip, fmt, args); - - if (unlikely(tracing_selftest_running || tracing_disabled)) - return 0; - - /* Don't pollute graph traces with trace_vprintk internals */ - pause_graph_tracing(); - - trace_ctx = tracing_gen_ctx(); - guard(preempt_notrace)(); - - tbuffer = get_trace_buf(); - if (!tbuffer) { - len = 0; - goto out_nobuffer; - } - - len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args); - - if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) - goto out_put; - - size = sizeof(*entry) + sizeof(u32) * len; - buffer = tr->array_buffer.buffer; - scoped_guard(ring_buffer_nest, buffer) { - event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, - trace_ctx); - if (!event) - goto out_put; - entry = ring_buffer_event_data(event); - entry->ip = ip; - entry->fmt = fmt; - - memcpy(entry->buf, tbuffer, sizeof(u32) * len); - __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); - } -out_put: - put_trace_buf(); - -out_nobuffer: - unpause_graph_tracing(); - - return len; -} -EXPORT_SYMBOL_GPL(trace_vbprintk); - -static __printf(3, 0) -int __trace_array_vprintk(struct trace_buffer *buffer, - unsigned long ip, const char *fmt, va_list args) -{ - struct ring_buffer_event *event; - int len = 0, size; - struct print_entry *entry; - unsigned int trace_ctx; - char *tbuffer; - - if (unlikely(tracing_disabled)) - return 0; - - /* Don't pollute graph traces with trace_vprintk internals */ - pause_graph_tracing(); - - trace_ctx = tracing_gen_ctx(); - guard(preempt_notrace)(); - - - tbuffer = get_trace_buf(); - if (!tbuffer) { - len = 0; - goto out_nobuffer; - } - - len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); - - size = sizeof(*entry) + len + 1; - scoped_guard(ring_buffer_nest, buffer) { - event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, - trace_ctx); - if (!event) - goto out; - entry = ring_buffer_event_data(event); - entry->ip = ip; - - memcpy(&entry->buf, tbuffer, len + 1); - __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL); - } -out: - put_trace_buf(); - -out_nobuffer: - unpause_graph_tracing(); - - return len; -} - -int trace_array_vprintk(struct trace_array *tr, - unsigned long ip, const char *fmt, va_list args) -{ - if (tracing_selftest_running && (tr->flags & TRACE_ARRAY_FL_GLOBAL)) - return 0; - - return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args); -} - -/** - * trace_array_printk - Print a message to a specific instance - * @tr: The instance trace_array descriptor - * @ip: The instruction pointer that this is called from. - * @fmt: The format to print (printf format) - * - * If a subsystem sets up its own instance, they have the right to - * printk strings into their tracing instance buffer using this - * function. Note, this function will not write into the top level - * buffer (use trace_printk() for that), as writing into the top level - * buffer should only have events that can be individually disabled. - * trace_printk() is only used for debugging a kernel, and should not - * be ever incorporated in normal use. - * - * trace_array_printk() can be used, as it will not add noise to the - * top level tracing buffer. - * - * Note, trace_array_init_printk() must be called on @tr before this - * can be used. - */ -int trace_array_printk(struct trace_array *tr, - unsigned long ip, const char *fmt, ...) -{ - int ret; - va_list ap; - - if (!tr) - return -ENOENT; - - /* This is only allowed for created instances */ - if (tr->flags & TRACE_ARRAY_FL_GLOBAL) - return 0; - - if (!(tr->trace_flags & TRACE_ITER(PRINTK))) - return 0; - - va_start(ap, fmt); - ret = trace_array_vprintk(tr, ip, fmt, ap); - va_end(ap); - return ret; -} -EXPORT_SYMBOL_GPL(trace_array_printk); - -/** - * trace_array_init_printk - Initialize buffers for trace_array_printk() - * @tr: The trace array to initialize the buffers for - * - * As trace_array_printk() only writes into instances, they are OK to - * have in the kernel (unlike trace_printk()). This needs to be called - * before trace_array_printk() can be used on a trace_array. - */ -int trace_array_init_printk(struct trace_array *tr) -{ - if (!tr) - return -ENOENT; - - /* This is only allowed for created instances */ - if (tr->flags & TRACE_ARRAY_FL_GLOBAL) - return -EINVAL; - - return alloc_percpu_trace_buffer(); -} -EXPORT_SYMBOL_GPL(trace_array_init_printk); - -int trace_array_printk_buf(struct trace_buffer *buffer, - unsigned long ip, const char *fmt, ...) -{ - int ret; - va_list ap; - - if (!(printk_trace->trace_flags & TRACE_ITER(PRINTK))) - return 0; - - va_start(ap, fmt); - ret = __trace_array_vprintk(buffer, ip, fmt, ap); - va_end(ap); - return ret; -} - -int trace_vprintk(unsigned long ip, const char *fmt, va_list args) -{ - return trace_array_vprintk(printk_trace, ip, fmt, args); -} -EXPORT_SYMBOL_GPL(trace_vprintk); - static void trace_iterator_increment(struct trace_iterator *iter) { struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 921e4daa2825..6b0fedf2f532 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -2131,6 +2131,7 @@ extern const char *__stop___tracepoint_str[]; void trace_printk_control(bool enabled); void trace_printk_start_comm(void); +void trace_printk_start_stop_comm(int enabled); int trace_keep_overwrite(struct tracer *tracer, u64 mask, int set); int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled); diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 29f6e95439b6..c9cb74a33b3c 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -376,6 +376,437 @@ static const struct file_operations ftrace_formats_fops = { .release = seq_release, }; +static __always_inline bool printk_binsafe(struct trace_array *tr) +{ + /* + * The binary format of traceprintk can cause a crash if used + * by a buffer from another boot. Force the use of the + * non binary version of trace_printk if the trace_printk + * buffer is a boot mapped ring buffer. + */ + return !(tr->flags & TRACE_ARRAY_FL_BOOT); +} + +int __trace_array_puts(struct trace_array *tr, unsigned long ip, + const char *str, int size) +{ + struct ring_buffer_event *event; + struct trace_buffer *buffer; + struct print_entry *entry; + unsigned int trace_ctx; + int alloc; + + if (!(tr->trace_flags & TRACE_ITER(PRINTK))) + return 0; + + if (unlikely(tracing_selftest_running && + (tr->flags & TRACE_ARRAY_FL_GLOBAL))) + return 0; + + if (unlikely(tracing_disabled)) + return 0; + + alloc = sizeof(*entry) + size + 2; /* possible \n added */ + + trace_ctx = tracing_gen_ctx(); + buffer = tr->array_buffer.buffer; + guard(ring_buffer_nest)(buffer); + event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, + trace_ctx); + if (!event) + return 0; + + entry = ring_buffer_event_data(event); + entry->ip = ip; + + memcpy(&entry->buf, str, size); + + /* Add a newline if necessary */ + if (entry->buf[size - 1] != '\n') { + entry->buf[size] = '\n'; + entry->buf[size + 1] = '\0'; + } else + entry->buf[size] = '\0'; + + __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); + return size; +} +EXPORT_SYMBOL_GPL(__trace_array_puts); + +/** + * __trace_puts - write a constant string into the trace buffer. + * @ip: The address of the caller + * @str: The constant string to write + * @size: The size of the string. + */ +int __trace_puts(unsigned long ip, const char *str, int size) +{ + return __trace_array_puts(printk_trace, ip, str, size); +} +EXPORT_SYMBOL_GPL(__trace_puts); + +/** + * __trace_bputs - write the pointer to a constant string into trace buffer + * @ip: The address of the caller + * @str: The constant string to write to the buffer to + */ +int __trace_bputs(unsigned long ip, const char *str) +{ + struct trace_array *tr = READ_ONCE(printk_trace); + struct ring_buffer_event *event; + struct trace_buffer *buffer; + struct bputs_entry *entry; + unsigned int trace_ctx; + int size = sizeof(struct bputs_entry); + + if (!printk_binsafe(tr)) + return __trace_puts(ip, str, strlen(str)); + + if (!(tr->trace_flags & TRACE_ITER(PRINTK))) + return 0; + + if (unlikely(tracing_selftest_running || tracing_disabled)) + return 0; + + trace_ctx = tracing_gen_ctx(); + buffer = tr->array_buffer.buffer; + + guard(ring_buffer_nest)(buffer); + event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, + trace_ctx); + if (!event) + return 0; + + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->str = str; + + __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL); + + return 1; +} +EXPORT_SYMBOL_GPL(__trace_bputs); + +/* created for use with alloc_percpu */ +struct trace_buffer_struct { + int nesting; + char buffer[4][TRACE_BUF_SIZE]; +}; + +static struct trace_buffer_struct __percpu *trace_percpu_buffer; + +/* + * This allows for lockless recording. If we're nested too deeply, then + * this returns NULL. + */ +static char *get_trace_buf(void) +{ + struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer); + + if (!trace_percpu_buffer || buffer->nesting >= 4) + return NULL; + + buffer->nesting++; + + /* Interrupts must see nesting incremented before we use the buffer */ + barrier(); + return &buffer->buffer[buffer->nesting - 1][0]; +} + +static void put_trace_buf(void) +{ + /* Don't let the decrement of nesting leak before this */ + barrier(); + this_cpu_dec(trace_percpu_buffer->nesting); +} + +static int alloc_percpu_trace_buffer(void) +{ + struct trace_buffer_struct __percpu *buffers; + + if (trace_percpu_buffer) + return 0; + + buffers = alloc_percpu(struct trace_buffer_struct); + if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer")) + return -ENOMEM; + + trace_percpu_buffer = buffers; + return 0; +} + +static int buffers_allocated; + +void trace_printk_init_buffers(void) +{ + if (buffers_allocated) + return; + + if (alloc_percpu_trace_buffer()) + return; + + /* trace_printk() is for debug use only. Don't use it in production. */ + + pr_warn("\n"); + pr_warn("**********************************************************\n"); + pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_warn("** **\n"); + pr_warn("** trace_printk() being used. Allocating extra memory. **\n"); + pr_warn("** **\n"); + pr_warn("** This means that this is a DEBUG kernel and it is **\n"); + pr_warn("** unsafe for production use. **\n"); + pr_warn("** **\n"); + pr_warn("** If you see this message and you are not debugging **\n"); + pr_warn("** the kernel, report this immediately to your vendor! **\n"); + pr_warn("** **\n"); + pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_warn("**********************************************************\n"); + + /* Expand the buffers to set size */ + if (tracing_update_buffers(NULL) < 0) + pr_err("Failed to expand tracing buffers for trace_printk() calls\n"); + else + buffers_allocated = 1; + + /* + * trace_printk_init_buffers() can be called by modules. + * If that happens, then we need to start cmdline recording + * directly here. + */ + if (system_state == SYSTEM_RUNNING) + tracing_start_cmdline_record(); +} +EXPORT_SYMBOL_GPL(trace_printk_init_buffers); + +void trace_printk_start_comm(void) +{ + /* Start tracing comms if trace printk is set */ + if (!buffers_allocated) + return; + tracing_start_cmdline_record(); +} + +void trace_printk_start_stop_comm(int enabled) +{ + if (!buffers_allocated) + return; + + if (enabled) + tracing_start_cmdline_record(); + else + tracing_stop_cmdline_record(); +} + +/** + * trace_vbprintk - write binary msg to tracing buffer + * @ip: The address of the caller + * @fmt: The string format to write to the buffer + * @args: Arguments for @fmt + */ +int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) +{ + struct ring_buffer_event *event; + struct trace_buffer *buffer; + struct trace_array *tr = READ_ONCE(printk_trace); + struct bprint_entry *entry; + unsigned int trace_ctx; + char *tbuffer; + int len = 0, size; + + if (!printk_binsafe(tr)) + return trace_vprintk(ip, fmt, args); + + if (unlikely(tracing_selftest_running || tracing_disabled)) + return 0; + + /* Don't pollute graph traces with trace_vprintk internals */ + pause_graph_tracing(); + + trace_ctx = tracing_gen_ctx(); + guard(preempt_notrace)(); + + tbuffer = get_trace_buf(); + if (!tbuffer) { + len = 0; + goto out_nobuffer; + } + + len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args); + + if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) + goto out_put; + + size = sizeof(*entry) + sizeof(u32) * len; + buffer = tr->array_buffer.buffer; + scoped_guard(ring_buffer_nest, buffer) { + event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, + trace_ctx); + if (!event) + goto out_put; + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->fmt = fmt; + + memcpy(entry->buf, tbuffer, sizeof(u32) * len); + __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); + } +out_put: + put_trace_buf(); + +out_nobuffer: + unpause_graph_tracing(); + + return len; +} +EXPORT_SYMBOL_GPL(trace_vbprintk); + +static __printf(3, 0) +int __trace_array_vprintk(struct trace_buffer *buffer, + unsigned long ip, const char *fmt, va_list args) +{ + struct ring_buffer_event *event; + int len = 0, size; + struct print_entry *entry; + unsigned int trace_ctx; + char *tbuffer; + + if (unlikely(tracing_disabled)) + return 0; + + /* Don't pollute graph traces with trace_vprintk internals */ + pause_graph_tracing(); + + trace_ctx = tracing_gen_ctx(); + guard(preempt_notrace)(); + + + tbuffer = get_trace_buf(); + if (!tbuffer) { + len = 0; + goto out_nobuffer; + } + + len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); + + size = sizeof(*entry) + len + 1; + scoped_guard(ring_buffer_nest, buffer) { + event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, + trace_ctx); + if (!event) + goto out; + entry = ring_buffer_event_data(event); + entry->ip = ip; + + memcpy(&entry->buf, tbuffer, len + 1); + __buffer_unlock_commit(buffer, event); + ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL); + } +out: + put_trace_buf(); + +out_nobuffer: + unpause_graph_tracing(); + + return len; +} + +int trace_array_vprintk(struct trace_array *tr, + unsigned long ip, const char *fmt, va_list args) +{ + if (tracing_selftest_running && (tr->flags & TRACE_ARRAY_FL_GLOBAL)) + return 0; + + return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args); +} + +/** + * trace_array_printk - Print a message to a specific instance + * @tr: The instance trace_array descriptor + * @ip: The instruction pointer that this is called from. + * @fmt: The format to print (printf format) + * + * If a subsystem sets up its own instance, they have the right to + * printk strings into their tracing instance buffer using this + * function. Note, this function will not write into the top level + * buffer (use trace_printk() for that), as writing into the top level + * buffer should only have events that can be individually disabled. + * trace_printk() is only used for debugging a kernel, and should not + * be ever incorporated in normal use. + * + * trace_array_printk() can be used, as it will not add noise to the + * top level tracing buffer. + * + * Note, trace_array_init_printk() must be called on @tr before this + * can be used. + */ +int trace_array_printk(struct trace_array *tr, + unsigned long ip, const char *fmt, ...) +{ + int ret; + va_list ap; + + if (!tr) + return -ENOENT; + + /* This is only allowed for created instances */ + if (tr->flags & TRACE_ARRAY_FL_GLOBAL) + return 0; + + if (!(tr->trace_flags & TRACE_ITER(PRINTK))) + return 0; + + va_start(ap, fmt); + ret = trace_array_vprintk(tr, ip, fmt, ap); + va_end(ap); + return ret; +} +EXPORT_SYMBOL_GPL(trace_array_printk); + +/** + * trace_array_init_printk - Initialize buffers for trace_array_printk() + * @tr: The trace array to initialize the buffers for + * + * As trace_array_printk() only writes into instances, they are OK to + * have in the kernel (unlike trace_printk()). This needs to be called + * before trace_array_printk() can be used on a trace_array. + */ +int trace_array_init_printk(struct trace_array *tr) +{ + if (!tr) + return -ENOENT; + + /* This is only allowed for created instances */ + if (tr->flags & TRACE_ARRAY_FL_GLOBAL) + return -EINVAL; + + return alloc_percpu_trace_buffer(); +} +EXPORT_SYMBOL_GPL(trace_array_init_printk); + +int trace_array_printk_buf(struct trace_buffer *buffer, + unsigned long ip, const char *fmt, ...) +{ + int ret; + va_list ap; + + if (!(printk_trace->trace_flags & TRACE_ITER(PRINTK))) + return 0; + + va_start(ap, fmt); + ret = __trace_array_vprintk(buffer, ip, fmt, ap); + va_end(ap); + return ret; +} + +int trace_vprintk(unsigned long ip, const char *fmt, va_list args) +{ + return trace_array_vprintk(printk_trace, ip, fmt, args); +} +EXPORT_SYMBOL_GPL(trace_vprintk); + static __init int init_trace_printk_function_export(void) { int ret; -- cgit v1.2.3 From 98021e37d694ddc48f45b690045df013054fd69c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 7 Feb 2026 22:24:27 -0500 Subject: tracing: Move pid filtering into trace_pid.c The trace.c file was a dumping ground for most tracing code. Start organizing it better by moving various functions out into their own files. Move the PID filtering functions from trace.c into its own trace_pid.c file. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://patch.msgid.link/20260208032450.998330662@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/Makefile | 1 + kernel/trace/trace.c | 242 ---------------------------------------------- kernel/trace/trace_pid.c | 246 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 247 insertions(+), 242 deletions(-) create mode 100644 kernel/trace/trace_pid.c (limited to 'kernel/trace') diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index fc5dcc888e13..04096c21d06b 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -68,6 +68,7 @@ obj-$(CONFIG_TRACING) += trace_output.o obj-$(CONFIG_TRACING) += trace_seq.o obj-$(CONFIG_TRACING) += trace_stat.o obj-$(CONFIG_TRACING) += trace_printk.o +obj-$(CONFIG_TRACING) += trace_pid.o obj-$(CONFIG_TRACING) += pid_list.o obj-$(CONFIG_TRACING_MAP) += tracing_map.o obj-$(CONFIG_PREEMPTIRQ_DELAY_TEST) += preemptirq_delay_test.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5812b830c1fa..551a452befa0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -637,248 +637,6 @@ int tracing_check_open_get_tr(struct trace_array *tr) return 0; } -/** - * trace_find_filtered_pid - check if a pid exists in a filtered_pid list - * @filtered_pids: The list of pids to check - * @search_pid: The PID to find in @filtered_pids - * - * Returns true if @search_pid is found in @filtered_pids, and false otherwise. - */ -bool -trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) -{ - return trace_pid_list_is_set(filtered_pids, search_pid); -} - -/** - * trace_ignore_this_task - should a task be ignored for tracing - * @filtered_pids: The list of pids to check - * @filtered_no_pids: The list of pids not to be traced - * @task: The task that should be ignored if not filtered - * - * Checks if @task should be traced or not from @filtered_pids. - * Returns true if @task should *NOT* be traced. - * Returns false if @task should be traced. - */ -bool -trace_ignore_this_task(struct trace_pid_list *filtered_pids, - struct trace_pid_list *filtered_no_pids, - struct task_struct *task) -{ - /* - * If filtered_no_pids is not empty, and the task's pid is listed - * in filtered_no_pids, then return true. - * Otherwise, if filtered_pids is empty, that means we can - * trace all tasks. If it has content, then only trace pids - * within filtered_pids. - */ - - return (filtered_pids && - !trace_find_filtered_pid(filtered_pids, task->pid)) || - (filtered_no_pids && - trace_find_filtered_pid(filtered_no_pids, task->pid)); -} - -/** - * trace_filter_add_remove_task - Add or remove a task from a pid_list - * @pid_list: The list to modify - * @self: The current task for fork or NULL for exit - * @task: The task to add or remove - * - * If adding a task, if @self is defined, the task is only added if @self - * is also included in @pid_list. This happens on fork and tasks should - * only be added when the parent is listed. If @self is NULL, then the - * @task pid will be removed from the list, which would happen on exit - * of a task. - */ -void trace_filter_add_remove_task(struct trace_pid_list *pid_list, - struct task_struct *self, - struct task_struct *task) -{ - if (!pid_list) - return; - - /* For forks, we only add if the forking task is listed */ - if (self) { - if (!trace_find_filtered_pid(pid_list, self->pid)) - return; - } - - /* "self" is set for forks, and NULL for exits */ - if (self) - trace_pid_list_set(pid_list, task->pid); - else - trace_pid_list_clear(pid_list, task->pid); -} - -/** - * trace_pid_next - Used for seq_file to get to the next pid of a pid_list - * @pid_list: The pid list to show - * @v: The last pid that was shown (+1 the actual pid to let zero be displayed) - * @pos: The position of the file - * - * This is used by the seq_file "next" operation to iterate the pids - * listed in a trace_pid_list structure. - * - * Returns the pid+1 as we want to display pid of zero, but NULL would - * stop the iteration. - */ -void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) -{ - long pid = (unsigned long)v; - unsigned int next; - - (*pos)++; - - /* pid already is +1 of the actual previous bit */ - if (trace_pid_list_next(pid_list, pid, &next) < 0) - return NULL; - - pid = next; - - /* Return pid + 1 to allow zero to be represented */ - return (void *)(pid + 1); -} - -/** - * trace_pid_start - Used for seq_file to start reading pid lists - * @pid_list: The pid list to show - * @pos: The position of the file - * - * This is used by seq_file "start" operation to start the iteration - * of listing pids. - * - * Returns the pid+1 as we want to display pid of zero, but NULL would - * stop the iteration. - */ -void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos) -{ - unsigned long pid; - unsigned int first; - loff_t l = 0; - - if (trace_pid_list_first(pid_list, &first) < 0) - return NULL; - - pid = first; - - /* Return pid + 1 so that zero can be the exit value */ - for (pid++; pid && l < *pos; - pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l)) - ; - return (void *)pid; -} - -/** - * trace_pid_show - show the current pid in seq_file processing - * @m: The seq_file structure to write into - * @v: A void pointer of the pid (+1) value to display - * - * Can be directly used by seq_file operations to display the current - * pid value. - */ -int trace_pid_show(struct seq_file *m, void *v) -{ - unsigned long pid = (unsigned long)v - 1; - - seq_printf(m, "%lu\n", pid); - return 0; -} - -/* 128 should be much more than enough */ -#define PID_BUF_SIZE 127 - -int trace_pid_write(struct trace_pid_list *filtered_pids, - struct trace_pid_list **new_pid_list, - const char __user *ubuf, size_t cnt) -{ - struct trace_pid_list *pid_list; - struct trace_parser parser; - unsigned long val; - int nr_pids = 0; - ssize_t read = 0; - ssize_t ret; - loff_t pos; - pid_t pid; - - if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1)) - return -ENOMEM; - - /* - * Always recreate a new array. The write is an all or nothing - * operation. Always create a new array when adding new pids by - * the user. If the operation fails, then the current list is - * not modified. - */ - pid_list = trace_pid_list_alloc(); - if (!pid_list) { - trace_parser_put(&parser); - return -ENOMEM; - } - - if (filtered_pids) { - /* copy the current bits to the new max */ - ret = trace_pid_list_first(filtered_pids, &pid); - while (!ret) { - ret = trace_pid_list_set(pid_list, pid); - if (ret < 0) - goto out; - - ret = trace_pid_list_next(filtered_pids, pid + 1, &pid); - nr_pids++; - } - } - - ret = 0; - while (cnt > 0) { - - pos = 0; - - ret = trace_get_user(&parser, ubuf, cnt, &pos); - if (ret < 0) - break; - - read += ret; - ubuf += ret; - cnt -= ret; - - if (!trace_parser_loaded(&parser)) - break; - - ret = -EINVAL; - if (kstrtoul(parser.buffer, 0, &val)) - break; - - pid = (pid_t)val; - - if (trace_pid_list_set(pid_list, pid) < 0) { - ret = -1; - break; - } - nr_pids++; - - trace_parser_clear(&parser); - ret = 0; - } - out: - trace_parser_put(&parser); - - if (ret < 0) { - trace_pid_list_free(pid_list); - return ret; - } - - if (!nr_pids) { - /* Cleared the list of pids */ - trace_pid_list_free(pid_list); - pid_list = NULL; - } - - *new_pid_list = pid_list; - - return read; -} - static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu) { u64 ts; diff --git a/kernel/trace/trace_pid.c b/kernel/trace/trace_pid.c new file mode 100644 index 000000000000..7127c8de4174 --- /dev/null +++ b/kernel/trace/trace_pid.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "trace.h" + +/** + * trace_find_filtered_pid - check if a pid exists in a filtered_pid list + * @filtered_pids: The list of pids to check + * @search_pid: The PID to find in @filtered_pids + * + * Returns true if @search_pid is found in @filtered_pids, and false otherwise. + */ +bool +trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) +{ + return trace_pid_list_is_set(filtered_pids, search_pid); +} + +/** + * trace_ignore_this_task - should a task be ignored for tracing + * @filtered_pids: The list of pids to check + * @filtered_no_pids: The list of pids not to be traced + * @task: The task that should be ignored if not filtered + * + * Checks if @task should be traced or not from @filtered_pids. + * Returns true if @task should *NOT* be traced. + * Returns false if @task should be traced. + */ +bool +trace_ignore_this_task(struct trace_pid_list *filtered_pids, + struct trace_pid_list *filtered_no_pids, + struct task_struct *task) +{ + /* + * If filtered_no_pids is not empty, and the task's pid is listed + * in filtered_no_pids, then return true. + * Otherwise, if filtered_pids is empty, that means we can + * trace all tasks. If it has content, then only trace pids + * within filtered_pids. + */ + + return (filtered_pids && + !trace_find_filtered_pid(filtered_pids, task->pid)) || + (filtered_no_pids && + trace_find_filtered_pid(filtered_no_pids, task->pid)); +} + +/** + * trace_filter_add_remove_task - Add or remove a task from a pid_list + * @pid_list: The list to modify + * @self: The current task for fork or NULL for exit + * @task: The task to add or remove + * + * If adding a task, if @self is defined, the task is only added if @self + * is also included in @pid_list. This happens on fork and tasks should + * only be added when the parent is listed. If @self is NULL, then the + * @task pid will be removed from the list, which would happen on exit + * of a task. + */ +void trace_filter_add_remove_task(struct trace_pid_list *pid_list, + struct task_struct *self, + struct task_struct *task) +{ + if (!pid_list) + return; + + /* For forks, we only add if the forking task is listed */ + if (self) { + if (!trace_find_filtered_pid(pid_list, self->pid)) + return; + } + + /* "self" is set for forks, and NULL for exits */ + if (self) + trace_pid_list_set(pid_list, task->pid); + else + trace_pid_list_clear(pid_list, task->pid); +} + +/** + * trace_pid_next - Used for seq_file to get to the next pid of a pid_list + * @pid_list: The pid list to show + * @v: The last pid that was shown (+1 the actual pid to let zero be displayed) + * @pos: The position of the file + * + * This is used by the seq_file "next" operation to iterate the pids + * listed in a trace_pid_list structure. + * + * Returns the pid+1 as we want to display pid of zero, but NULL would + * stop the iteration. + */ +void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) +{ + long pid = (unsigned long)v; + unsigned int next; + + (*pos)++; + + /* pid already is +1 of the actual previous bit */ + if (trace_pid_list_next(pid_list, pid, &next) < 0) + return NULL; + + pid = next; + + /* Return pid + 1 to allow zero to be represented */ + return (void *)(pid + 1); +} + +/** + * trace_pid_start - Used for seq_file to start reading pid lists + * @pid_list: The pid list to show + * @pos: The position of the file + * + * This is used by seq_file "start" operation to start the iteration + * of listing pids. + * + * Returns the pid+1 as we want to display pid of zero, but NULL would + * stop the iteration. + */ +void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos) +{ + unsigned long pid; + unsigned int first; + loff_t l = 0; + + if (trace_pid_list_first(pid_list, &first) < 0) + return NULL; + + pid = first; + + /* Return pid + 1 so that zero can be the exit value */ + for (pid++; pid && l < *pos; + pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l)) + ; + return (void *)pid; +} + +/** + * trace_pid_show - show the current pid in seq_file processing + * @m: The seq_file structure to write into + * @v: A void pointer of the pid (+1) value to display + * + * Can be directly used by seq_file operations to display the current + * pid value. + */ +int trace_pid_show(struct seq_file *m, void *v) +{ + unsigned long pid = (unsigned long)v - 1; + + seq_printf(m, "%lu\n", pid); + return 0; +} + +/* 128 should be much more than enough */ +#define PID_BUF_SIZE 127 + +int trace_pid_write(struct trace_pid_list *filtered_pids, + struct trace_pid_list **new_pid_list, + const char __user *ubuf, size_t cnt) +{ + struct trace_pid_list *pid_list; + struct trace_parser parser; + unsigned long val; + int nr_pids = 0; + ssize_t read = 0; + ssize_t ret; + loff_t pos; + pid_t pid; + + if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1)) + return -ENOMEM; + + /* + * Always recreate a new array. The write is an all or nothing + * operation. Always create a new array when adding new pids by + * the user. If the operation fails, then the current list is + * not modified. + */ + pid_list = trace_pid_list_alloc(); + if (!pid_list) { + trace_parser_put(&parser); + return -ENOMEM; + } + + if (filtered_pids) { + /* copy the current bits to the new max */ + ret = trace_pid_list_first(filtered_pids, &pid); + while (!ret) { + ret = trace_pid_list_set(pid_list, pid); + if (ret < 0) + goto out; + + ret = trace_pid_list_next(filtered_pids, pid + 1, &pid); + nr_pids++; + } + } + + ret = 0; + while (cnt > 0) { + + pos = 0; + + ret = trace_get_user(&parser, ubuf, cnt, &pos); + if (ret < 0) + break; + + read += ret; + ubuf += ret; + cnt -= ret; + + if (!trace_parser_loaded(&parser)) + break; + + ret = -EINVAL; + if (kstrtoul(parser.buffer, 0, &val)) + break; + + pid = (pid_t)val; + + if (trace_pid_list_set(pid_list, pid) < 0) { + ret = -1; + break; + } + nr_pids++; + + trace_parser_clear(&parser); + ret = 0; + } + out: + trace_parser_put(&parser); + + if (ret < 0) { + trace_pid_list_free(pid_list); + return ret; + } + + if (!nr_pids) { + /* Cleared the list of pids */ + trace_pid_list_free(pid_list); + pid_list = NULL; + } + + *new_pid_list = pid_list; + + return read; +} + -- cgit v1.2.3 From 694b3f6fe0b6c86ff75e94302708f5a718027297 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sun, 8 Feb 2026 13:38:33 -0500 Subject: tracing: Rename trace_array field max_buffer to snapshot_buffer When tracing was first added, there were latency tracers that would take a snapshot of the current trace when a new max latency was hit. This snapshot buffer was called "max_buffer". Since then, a snapshot feature was added that allowed user space or event triggers to trigger a snapshot of the current buffer using the same max_buffer of the trace_array. As this snapshot buffer now has a more generic use case, calling it "max_buffer" is confusing. Rename it to snapshot_buffer. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://patch.msgid.link/20260208183856.428446729@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 72 +++++++++++++++++++++---------------------- kernel/trace/trace.h | 13 ++++---- kernel/trace/trace_selftest.c | 10 +++--- 3 files changed, 48 insertions(+), 47 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 551a452befa0..98524d0656bf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -934,12 +934,12 @@ int tracing_alloc_snapshot_instance(struct trace_array *tr) /* Make the snapshot buffer have the same order as main buffer */ order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer); - ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); + ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order); if (ret < 0) return ret; /* allocate spare buffer */ - ret = resize_buffer_duplicate_size(&tr->max_buffer, + ret = resize_buffer_duplicate_size(&tr->snapshot_buffer, &tr->array_buffer, RING_BUFFER_ALL_CPUS); if (ret < 0) return ret; @@ -957,10 +957,10 @@ static void free_snapshot(struct trace_array *tr) * The max_tr ring buffer has some state (e.g. ring->clock) and * we want preserve it. */ - ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0); - ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); - set_buffer_entries(&tr->max_buffer, 1); - tracing_reset_online_cpus(&tr->max_buffer); + ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, 0); + ring_buffer_resize(tr->snapshot_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); + set_buffer_entries(&tr->snapshot_buffer, 1); + tracing_reset_online_cpus(&tr->snapshot_buffer); tr->allocated_snapshot = false; } @@ -1556,7 +1556,7 @@ static void __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct array_buffer *trace_buf = &tr->array_buffer; - struct array_buffer *max_buf = &tr->max_buffer; + struct array_buffer *max_buf = &tr->snapshot_buffer; struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu); struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu); @@ -1616,9 +1616,9 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, /* Inherit the recordable setting from array_buffer */ if (ring_buffer_record_is_set_on(tr->array_buffer.buffer)) - ring_buffer_record_on(tr->max_buffer.buffer); + ring_buffer_record_on(tr->snapshot_buffer.buffer); else - ring_buffer_record_off(tr->max_buffer.buffer); + ring_buffer_record_off(tr->snapshot_buffer.buffer); #ifdef CONFIG_TRACER_SNAPSHOT if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) { @@ -1626,7 +1626,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, return; } #endif - swap(tr->array_buffer.buffer, tr->max_buffer.buffer); + swap(tr->array_buffer.buffer, tr->snapshot_buffer.buffer); __update_max_tr(tr, tsk, cpu); @@ -1661,7 +1661,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) arch_spin_lock(&tr->max_lock); - ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu); + ret = ring_buffer_swap_cpu(tr->snapshot_buffer.buffer, tr->array_buffer.buffer, cpu); if (ret == -EBUSY) { /* @@ -1671,7 +1671,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) * and flag that it failed. * Another reason is resize is in progress. */ - trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_, + trace_array_printk_buf(tr->snapshot_buffer.buffer, _THIS_IP_, "Failed to swap buffers due to commit or resize in progress\n"); } @@ -1722,7 +1722,7 @@ static int wait_on_pipe(struct trace_iterator *iter, int full) * to happen, this would now be the main buffer. */ if (iter->snapshot) - iter->array_buffer = &iter->tr->max_buffer; + iter->array_buffer = &iter->tr->snapshot_buffer; #endif return ret; } @@ -1790,7 +1790,7 @@ static int run_tracer_selftest(struct tracer *type) if (type->use_max_tr) { /* If we expanded the buffers, make sure the max is expanded too */ if (tr->ring_buffer_expanded) - ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size, + ring_buffer_resize(tr->snapshot_buffer.buffer, trace_buf_size, RING_BUFFER_ALL_CPUS); tr->allocated_snapshot = true; } @@ -1817,7 +1817,7 @@ static int run_tracer_selftest(struct tracer *type) /* Shrink the max buffer again */ if (tr->ring_buffer_expanded) - ring_buffer_resize(tr->max_buffer.buffer, 1, + ring_buffer_resize(tr->snapshot_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); } #endif @@ -2060,7 +2060,7 @@ void tracing_reset_all_online_cpus_unlocked(void) tr->clear_trace = false; tracing_reset_online_cpus(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE - tracing_reset_online_cpus(&tr->max_buffer); + tracing_reset_online_cpus(&tr->snapshot_buffer); #endif } } @@ -2100,7 +2100,7 @@ static void tracing_start_tr(struct trace_array *tr) ring_buffer_record_enable(buffer); #ifdef CONFIG_TRACER_MAX_TRACE - buffer = tr->max_buffer.buffer; + buffer = tr->snapshot_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); #endif @@ -2136,7 +2136,7 @@ static void tracing_stop_tr(struct trace_array *tr) ring_buffer_record_disable(buffer); #ifdef CONFIG_TRACER_MAX_TRACE - buffer = tr->max_buffer.buffer; + buffer = tr->snapshot_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); #endif @@ -3943,7 +3943,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) #ifdef CONFIG_TRACER_MAX_TRACE /* Currently only the top directory has a snapshot */ if (tr->current_trace->print_max || snapshot) - iter->array_buffer = &tr->max_buffer; + iter->array_buffer = &tr->snapshot_buffer; else #endif iter->array_buffer = &tr->array_buffer; @@ -4146,7 +4146,7 @@ static int tracing_open(struct inode *inode, struct file *file) #ifdef CONFIG_TRACER_MAX_TRACE if (tr->current_trace->print_max) - trace_buf = &tr->max_buffer; + trace_buf = &tr->snapshot_buffer; #endif if (cpu == RING_BUFFER_ALL_CPUS) @@ -4359,14 +4359,14 @@ int tracing_set_cpumask(struct trace_array *tr, !cpumask_test_cpu(cpu, tracing_cpumask_new)) { ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); #ifdef CONFIG_TRACER_MAX_TRACE - ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu); + ring_buffer_record_disable_cpu(tr->snapshot_buffer.buffer, cpu); #endif } if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && cpumask_test_cpu(cpu, tracing_cpumask_new)) { ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); #ifdef CONFIG_TRACER_MAX_TRACE - ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu); + ring_buffer_record_enable_cpu(tr->snapshot_buffer.buffer, cpu); #endif } } @@ -4576,7 +4576,7 @@ int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled) case TRACE_ITER(OVERWRITE): ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled); #ifdef CONFIG_TRACER_MAX_TRACE - ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled); + ring_buffer_change_overwrite(tr->snapshot_buffer.buffer, enabled); #endif break; @@ -5294,7 +5294,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, if (!tr->allocated_snapshot) goto out; - ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu); + ret = ring_buffer_resize(tr->snapshot_buffer.buffer, size, cpu); if (ret < 0) { int r = resize_buffer_duplicate_size(&tr->array_buffer, &tr->array_buffer, cpu); @@ -5319,7 +5319,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, goto out_start; } - update_buffer_entries(&tr->max_buffer, cpu); + update_buffer_entries(&tr->snapshot_buffer, cpu); out: #endif /* CONFIG_TRACER_MAX_TRACE */ @@ -7036,9 +7036,9 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr) tracing_reset_online_cpus(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE - if (tr->max_buffer.buffer) - ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func); - tracing_reset_online_cpus(&tr->max_buffer); + if (tr->snapshot_buffer.buffer) + ring_buffer_set_clock(tr->snapshot_buffer.buffer, trace_clocks[i].func); + tracing_reset_online_cpus(&tr->snapshot_buffer); #endif if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) { @@ -7170,7 +7170,7 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file) ret = 0; iter->tr = tr; - iter->array_buffer = &tr->max_buffer; + iter->array_buffer = &tr->snapshot_buffer; iter->cpu_file = tracing_get_cpu(inode); m->private = iter; file->private_data = m; @@ -7233,7 +7233,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, return -EINVAL; #endif if (tr->allocated_snapshot) - ret = resize_buffer_duplicate_size(&tr->max_buffer, + ret = resize_buffer_duplicate_size(&tr->snapshot_buffer, &tr->array_buffer, iter->cpu_file); ret = tracing_arm_snapshot_locked(tr); @@ -7254,9 +7254,9 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, default: if (tr->allocated_snapshot) { if (iter->cpu_file == RING_BUFFER_ALL_CPUS) - tracing_reset_online_cpus(&tr->max_buffer); + tracing_reset_online_cpus(&tr->snapshot_buffer); else - tracing_reset_cpu(&tr->max_buffer, iter->cpu_file); + tracing_reset_cpu(&tr->snapshot_buffer, iter->cpu_file); } break; } @@ -7312,7 +7312,7 @@ static int snapshot_raw_open(struct inode *inode, struct file *filp) } info->iter.snapshot = true; - info->iter.array_buffer = &info->iter.tr->max_buffer; + info->iter.array_buffer = &info->iter.tr->snapshot_buffer; return ret; } @@ -9195,7 +9195,7 @@ buffer_subbuf_size_write(struct file *filp, const char __user *ubuf, if (!tr->allocated_snapshot) goto out_max; - ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order); + ret = ring_buffer_subbuf_order_set(tr->snapshot_buffer.buffer, order); if (ret) { /* Put back the old order */ cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order); @@ -9416,7 +9416,7 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) if (tr->range_addr_start) return 0; - ret = allocate_trace_buffer(tr, &tr->max_buffer, + ret = allocate_trace_buffer(tr, &tr->snapshot_buffer, allocate_snapshot ? size : 1); if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) { free_trace_buffer(&tr->array_buffer); @@ -9439,7 +9439,7 @@ static void free_trace_buffers(struct trace_array *tr) kfree(tr->module_delta); #ifdef CONFIG_TRACER_MAX_TRACE - free_trace_buffer(&tr->max_buffer); + free_trace_buffer(&tr->snapshot_buffer); #endif } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6b0fedf2f532..b50383aa8e50 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -331,17 +331,18 @@ struct trace_array { struct array_buffer array_buffer; #ifdef CONFIG_TRACER_MAX_TRACE /* - * The max_buffer is used to snapshot the trace when a maximum + * The snapshot_buffer is used to snapshot the trace when a maximum * latency is reached, or when the user initiates a snapshot. * Some tracers will use this to store a maximum trace while * it continues examining live traces. * - * The buffers for the max_buffer are set up the same as the array_buffer - * When a snapshot is taken, the buffer of the max_buffer is swapped - * with the buffer of the array_buffer and the buffers are reset for - * the array_buffer so the tracing can continue. + * The buffers for the snapshot_buffer are set up the same as the + * array_buffer. When a snapshot is taken, the buffer of the + * snapshot_buffer is swapped with the buffer of the array_buffer + * and the buffers are reset for the array_buffer so the tracing can + * continue. */ - struct array_buffer max_buffer; + struct array_buffer snapshot_buffer; bool allocated_snapshot; spinlock_t snapshot_trigger_lock; unsigned int snapshot; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index d88c44f1dfa5..be53fe6fee6a 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -1225,7 +1225,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) /* check both trace buffers */ ret = trace_test_buffer(&tr->array_buffer, NULL); if (!ret) - ret = trace_test_buffer(&tr->max_buffer, &count); + ret = trace_test_buffer(&tr->snapshot_buffer, &count); trace->reset(tr); tracing_start(); @@ -1287,7 +1287,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) /* check both trace buffers */ ret = trace_test_buffer(&tr->array_buffer, NULL); if (!ret) - ret = trace_test_buffer(&tr->max_buffer, &count); + ret = trace_test_buffer(&tr->snapshot_buffer, &count); trace->reset(tr); tracing_start(); @@ -1355,7 +1355,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * if (ret) goto out; - ret = trace_test_buffer(&tr->max_buffer, &count); + ret = trace_test_buffer(&tr->snapshot_buffer, &count); if (ret) goto out; @@ -1385,7 +1385,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * if (ret) goto out; - ret = trace_test_buffer(&tr->max_buffer, &count); + ret = trace_test_buffer(&tr->snapshot_buffer, &count); if (!ret && !count) { printk(KERN_CONT ".. no entries found .."); @@ -1513,7 +1513,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) /* check both trace buffers */ ret = trace_test_buffer(&tr->array_buffer, NULL); if (!ret) - ret = trace_test_buffer(&tr->max_buffer, &count); + ret = trace_test_buffer(&tr->snapshot_buffer, &count); trace->reset(tr); -- cgit v1.2.3 From e4c1a09afbe2f02fc66b5ccbc96aa3a7109f9b79 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sun, 8 Feb 2026 13:38:34 -0500 Subject: tracing: Add tracer_uses_snapshot() helper to remove #ifdefs Instead of having #ifdef CONFIG_TRACER_MAX_TRACE around every access to the struct tracer's use_max_tr field, add a helper function for that access and if CONFIG_TRACER_MAX_TRACE is not configured it just returns false. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://patch.msgid.link/20260208183856.599390238@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 51 ++++++++++++++++----------------------------------- kernel/trace/trace.h | 12 ++++++++++++ 2 files changed, 28 insertions(+), 35 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 98524d0656bf..405212166677 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -810,7 +810,6 @@ EXPORT_SYMBOL_GPL(tracing_on); static void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data) { - struct tracer *tracer = tr->current_trace; unsigned long flags; if (in_nmi()) { @@ -827,7 +826,7 @@ static void tracing_snapshot_instance_cond(struct trace_array *tr, } /* Note, snapshot can not be used when the tracer uses it */ - if (tracer->use_max_tr) { + if (tracer_uses_snapshot(tr->current_trace)) { trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n"); trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); return; @@ -1076,7 +1075,7 @@ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, guard(mutex)(&trace_types_lock); - if (tr->current_trace->use_max_tr) + if (tracer_uses_snapshot(tr->current_trace)) return -EBUSY; /* @@ -1787,7 +1786,7 @@ static int run_tracer_selftest(struct tracer *type) tr->current_trace_flags = type->flags ? : type->default_flags; #ifdef CONFIG_TRACER_MAX_TRACE - if (type->use_max_tr) { + if (tracer_uses_snapshot(type)) { /* If we expanded the buffers, make sure the max is expanded too */ if (tr->ring_buffer_expanded) ring_buffer_resize(tr->snapshot_buffer.buffer, trace_buf_size, @@ -1812,7 +1811,7 @@ static int run_tracer_selftest(struct tracer *type) tracing_reset_online_cpus(&tr->array_buffer); #ifdef CONFIG_TRACER_MAX_TRACE - if (type->use_max_tr) { + if (tracer_uses_snapshot(type)) { tr->allocated_snapshot = false; /* Shrink the max buffer again */ @@ -3240,10 +3239,8 @@ static void *s_start(struct seq_file *m, loff_t *pos) } mutex_unlock(&trace_types_lock); -#ifdef CONFIG_TRACER_MAX_TRACE - if (iter->snapshot && iter->trace->use_max_tr) + if (iter->snapshot && tracer_uses_snapshot(iter->trace)) return ERR_PTR(-EBUSY); -#endif if (*pos != iter->pos) { iter->ent = NULL; @@ -3282,10 +3279,8 @@ static void s_stop(struct seq_file *m, void *p) { struct trace_iterator *iter = m->private; -#ifdef CONFIG_TRACER_MAX_TRACE - if (iter->snapshot && iter->trace->use_max_tr) + if (iter->snapshot && tracer_uses_snapshot(iter->trace)) return; -#endif trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); @@ -4177,11 +4172,9 @@ static int tracing_open(struct inode *inode, struct file *file) static bool trace_ok_for_array(struct tracer *t, struct trace_array *tr) { -#ifdef CONFIG_TRACER_SNAPSHOT /* arrays with mapped buffer range do not have snapshots */ - if (tr->range_addr_start && t->use_max_tr) + if (tr->range_addr_start && tracer_uses_snapshot(t)) return false; -#endif return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances; } @@ -5550,9 +5543,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) { struct tracer *trace = NULL; struct tracers *t; -#ifdef CONFIG_TRACER_MAX_TRACE bool had_max_tr; -#endif int ret; guard(mutex)(&trace_types_lock); @@ -5580,7 +5571,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) return 0; #ifdef CONFIG_TRACER_SNAPSHOT - if (trace->use_max_tr) { + if (tracer_uses_snapshot(trace)) { local_irq_disable(); arch_spin_lock(&tr->max_lock); ret = tr->cond_snapshot ? -EBUSY : 0; @@ -5612,14 +5603,13 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) if (tr->current_trace->reset) tr->current_trace->reset(tr); -#ifdef CONFIG_TRACER_MAX_TRACE - had_max_tr = tr->current_trace->use_max_tr; + had_max_tr = tracer_uses_snapshot(tr->current_trace); /* Current trace needs to be nop_trace before synchronize_rcu */ tr->current_trace = &nop_trace; tr->current_trace_flags = nop_trace.flags; - if (had_max_tr && !trace->use_max_tr) { + if (had_max_tr && !tracer_uses_snapshot(trace)) { /* * We need to make sure that the update_max_tr sees that * current_trace changed to nop_trace to keep it from @@ -5632,24 +5622,19 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf) tracing_disarm_snapshot(tr); } - if (!had_max_tr && trace->use_max_tr) { + if (!had_max_tr && tracer_uses_snapshot(trace)) { ret = tracing_arm_snapshot_locked(tr); if (ret) return ret; } -#else - tr->current_trace = &nop_trace; -#endif tr->current_trace_flags = t->flags ? : t->tracer->flags; if (trace->init) { ret = tracer_init(trace, tr); if (ret) { -#ifdef CONFIG_TRACER_MAX_TRACE - if (trace->use_max_tr) + if (tracer_uses_snapshot(trace)) tracing_disarm_snapshot(tr); -#endif tr->current_trace_flags = nop_trace.flags; return ret; } @@ -7207,7 +7192,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, guard(mutex)(&trace_types_lock); - if (tr->current_trace->use_max_tr) + if (tracer_uses_snapshot(tr->current_trace)) return -EBUSY; local_irq_disable(); @@ -7306,7 +7291,7 @@ static int snapshot_raw_open(struct inode *inode, struct file *filp) info = filp->private_data; - if (info->iter.trace->use_max_tr) { + if (tracer_uses_snapshot(info->iter.trace)) { tracing_buffers_release(inode, filp); return -EBUSY; } @@ -7862,10 +7847,8 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, if (!count) return 0; -#ifdef CONFIG_TRACER_MAX_TRACE - if (iter->snapshot && iter->tr->current_trace->use_max_tr) + if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace)) return -EBUSY; -#endif page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); @@ -8049,10 +8032,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, int entries, i; ssize_t ret = 0; -#ifdef CONFIG_TRACER_MAX_TRACE - if (iter->snapshot && iter->tr->current_trace->use_max_tr) + if (iter->snapshot && tracer_uses_snapshot(iter->tr->current_trace)) return -EBUSY; -#endif page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer); if (*ppos & (page_size - 1)) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b50383aa8e50..ebb47abc0ee7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -817,6 +817,18 @@ static inline void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, } #endif /* CONFIG_STACKTRACE */ +#ifdef CONFIG_TRACER_MAX_TRACE +static inline bool tracer_uses_snapshot(struct tracer *tracer) +{ + return tracer->use_max_tr; +} +#else +static inline bool tracer_uses_snapshot(struct tracer *tracer) +{ + return false; +} +#endif + void trace_last_func_repeats(struct trace_array *tr, struct trace_func_repeats *last_info, unsigned int trace_ctx); -- cgit v1.2.3 From c4f1fe47b106e9200cbb1b8951bd75f036d53bd3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sun, 8 Feb 2026 13:38:35 -0500 Subject: tracing: Better separate SNAPSHOT and MAX_TRACE options The latency tracers (scheduler, irqsoff, etc) were created when tracing was first added. These tracers required a "snapshot" buffer that was the same size as the ring buffer being written to. When a new max latency was hit, the main ring buffer would swap with the snapshot buffer so that the trace leading up to the latency would be saved in the snapshot buffer (The snapshot buffer is never written to directly and the data within it can be viewed without fear of being overwritten). Later, a new feature was added to allow snapshots to be taken by user space or even event triggers. This created a "snapshot" file that allowed users to trigger a snapshot from user space to save the current trace. The config for this new feature (CONFIG_TRACER_SNAPSHOT) would select the latency tracer config (CONFIG_TRACER_MAX_LATENCY) as it would need all the functionality from it as it already existed. But this was incorrect. As the snapshot feature is really what the latency tracers need and not the other way around. Have CONFIG_TRACER_MAX_TRACE select CONFIG_TRACER_SNAPSHOT where the tracers that needs the max latency buffer selects the TRACE_MAX_TRACE which will then select TRACER_SNAPSHOT. Also, go through trace.c and trace.h and make the code that only needs the TRACER_MAX_TRACE protected by that and the code that always requires the snapshot to be protected by TRACER_SNAPSHOT. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Link: https://patch.msgid.link/20260208183856.767870992@kernel.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/Kconfig | 8 +++--- kernel/trace/trace.c | 73 +++++++++++++++++++++++++++------------------------- kernel/trace/trace.h | 19 ++++++++------ 3 files changed, 53 insertions(+), 47 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index bfa2ec46e075..bedb2f982823 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -133,6 +133,7 @@ config BUILDTIME_MCOUNT_SORT config TRACER_MAX_TRACE bool + select TRACER_SNAPSHOT config TRACE_CLOCK bool @@ -422,7 +423,6 @@ config IRQSOFF_TRACER select GENERIC_TRACER select TRACER_MAX_TRACE select RING_BUFFER_ALLOW_SWAP - select TRACER_SNAPSHOT select TRACER_SNAPSHOT_PER_CPU_SWAP help This option measures the time spent in irqs-off critical @@ -445,7 +445,6 @@ config PREEMPT_TRACER select GENERIC_TRACER select TRACER_MAX_TRACE select RING_BUFFER_ALLOW_SWAP - select TRACER_SNAPSHOT select TRACER_SNAPSHOT_PER_CPU_SWAP select TRACE_PREEMPT_TOGGLE help @@ -467,7 +466,6 @@ config SCHED_TRACER select GENERIC_TRACER select CONTEXT_SWITCH_TRACER select TRACER_MAX_TRACE - select TRACER_SNAPSHOT help This tracer tracks the latency of the highest priority task to be scheduled in, starting from the point it has woken up. @@ -617,7 +615,6 @@ config TRACE_SYSCALL_BUF_SIZE_DEFAULT config TRACER_SNAPSHOT bool "Create a snapshot trace buffer" - select TRACER_MAX_TRACE help Allow tracing users to take snapshot of the current buffer using the ftrace interface, e.g.: @@ -625,6 +622,9 @@ config TRACER_SNAPSHOT echo 1 > /sys/kernel/tracing/snapshot cat snapshot + Note, the latency tracers select this option. To disable it, + all the latency tracers need to be disabled. + config TRACER_SNAPSHOT_PER_CPU_SWAP bool "Allow snapshot to swap per CPU" depends on TRACER_SNAPSHOT diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 405212166677..845b8a165daf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -825,15 +825,15 @@ static void tracing_snapshot_instance_cond(struct trace_array *tr, return; } - /* Note, snapshot can not be used when the tracer uses it */ - if (tracer_uses_snapshot(tr->current_trace)) { - trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n"); + if (tr->mapped) { + trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n"); trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); return; } - if (tr->mapped) { - trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n"); + /* Note, snapshot can not be used when the tracer uses it */ + if (tracer_uses_snapshot(tr->current_trace)) { + trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n"); trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); return; } @@ -1555,8 +1555,8 @@ static void __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { struct array_buffer *trace_buf = &tr->array_buffer; - struct array_buffer *max_buf = &tr->snapshot_buffer; struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu); + struct array_buffer *max_buf = &tr->snapshot_buffer; struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu); max_buf->cpu = cpu; @@ -1585,7 +1585,14 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) tracing_record_cmdline(tsk); latency_fsnotify(tr); } +#else +static inline void trace_create_maxlat_file(struct trace_array *tr, + struct dentry *d_tracer) { } +static inline void __update_max_tr(struct trace_array *tr, + struct task_struct *tsk, int cpu) { } +#endif /* CONFIG_TRACER_MAX_TRACE */ +#ifdef CONFIG_TRACER_SNAPSHOT /** * update_max_tr - snapshot all trace buffers from global_trace to max_tr * @tr: tracer @@ -1619,12 +1626,11 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, else ring_buffer_record_off(tr->snapshot_buffer.buffer); -#ifdef CONFIG_TRACER_SNAPSHOT if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) { arch_spin_unlock(&tr->max_lock); return; } -#endif + swap(tr->array_buffer.buffer, tr->snapshot_buffer.buffer); __update_max_tr(tr, tsk, cpu); @@ -1679,10 +1685,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) __update_max_tr(tr, tsk, cpu); arch_spin_unlock(&tr->max_lock); } -#else /* !CONFIG_TRACER_MAX_TRACE */ -static inline void trace_create_maxlat_file(struct trace_array *tr, - struct dentry *d_tracer) { } -#endif /* CONFIG_TRACER_MAX_TRACE */ +#endif /* CONFIG_TRACER_SNAPSHOT */ struct pipe_wait { struct trace_iterator *iter; @@ -1715,7 +1718,7 @@ static int wait_on_pipe(struct trace_iterator *iter, int full) ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full, wait_pipe_cond, &pwait); -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT /* * Make sure this is still the snapshot buffer, as if a snapshot were * to happen, this would now be the main buffer. @@ -2058,7 +2061,7 @@ void tracing_reset_all_online_cpus_unlocked(void) continue; tr->clear_trace = false; tracing_reset_online_cpus(&tr->array_buffer); -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT tracing_reset_online_cpus(&tr->snapshot_buffer); #endif } @@ -2098,7 +2101,7 @@ static void tracing_start_tr(struct trace_array *tr) if (buffer) ring_buffer_record_enable(buffer); -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT buffer = tr->snapshot_buffer.buffer; if (buffer) ring_buffer_record_enable(buffer); @@ -2134,7 +2137,7 @@ static void tracing_stop_tr(struct trace_array *tr) if (buffer) ring_buffer_record_disable(buffer); -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT buffer = tr->snapshot_buffer.buffer; if (buffer) ring_buffer_record_disable(buffer); @@ -3757,7 +3760,7 @@ static void test_ftrace_alive(struct seq_file *m) "# MAY BE MISSING FUNCTION EVENTS\n"); } -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT static void show_snapshot_main_help(struct seq_file *m) { seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n" @@ -3935,7 +3938,7 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot) iter->tr = tr; -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT /* Currently only the top directory has a snapshot */ if (tr->current_trace->print_max || snapshot) iter->array_buffer = &tr->snapshot_buffer; @@ -4351,14 +4354,14 @@ int tracing_set_cpumask(struct trace_array *tr, if (cpumask_test_cpu(cpu, tr->tracing_cpumask) && !cpumask_test_cpu(cpu, tracing_cpumask_new)) { ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu); -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT ring_buffer_record_disable_cpu(tr->snapshot_buffer.buffer, cpu); #endif } if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) && cpumask_test_cpu(cpu, tracing_cpumask_new)) { ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu); -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT ring_buffer_record_enable_cpu(tr->snapshot_buffer.buffer, cpu); #endif } @@ -4568,7 +4571,7 @@ int set_tracer_flag(struct trace_array *tr, u64 mask, int enabled) case TRACE_ITER(OVERWRITE): ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled); -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT ring_buffer_change_overwrite(tr->snapshot_buffer.buffer, enabled); #endif break; @@ -5232,7 +5235,7 @@ static void update_buffer_entries(struct array_buffer *buf, int cpu) } } -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT /* resize @tr's buffer to the size of @size_tr's entries */ static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, struct array_buffer *size_buf, int cpu_id) @@ -5258,7 +5261,7 @@ static int resize_buffer_duplicate_size(struct array_buffer *trace_buf, return ret; } -#endif /* CONFIG_TRACER_MAX_TRACE */ +#endif /* CONFIG_TRACER_SNAPSHOT */ static int __tracing_resize_ring_buffer(struct trace_array *tr, unsigned long size, int cpu) @@ -5283,7 +5286,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, if (ret < 0) goto out_start; -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT if (!tr->allocated_snapshot) goto out; @@ -5315,7 +5318,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, update_buffer_entries(&tr->snapshot_buffer, cpu); out: -#endif /* CONFIG_TRACER_MAX_TRACE */ +#endif /* CONFIG_TRACER_SNAPSHOT */ update_buffer_entries(&tr->array_buffer, cpu); out_start: @@ -7020,7 +7023,7 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr) */ tracing_reset_online_cpus(&tr->array_buffer); -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT if (tr->snapshot_buffer.buffer) ring_buffer_set_clock(tr->snapshot_buffer.buffer, trace_clocks[i].func); tracing_reset_online_cpus(&tr->snapshot_buffer); @@ -8167,7 +8170,7 @@ static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned return 0; } -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT static int get_snapshot_map(struct trace_array *tr) { int err = 0; @@ -9171,7 +9174,7 @@ buffer_subbuf_size_write(struct file *filp, const char __user *ubuf, if (ret) goto out; -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT if (!tr->allocated_snapshot) goto out_max; @@ -9392,7 +9395,7 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) if (ret) return ret; -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT /* Fix mapped buffer trace arrays do not have snapshot buffers */ if (tr->range_addr_start) return 0; @@ -9419,7 +9422,7 @@ static void free_trace_buffers(struct trace_array *tr) free_trace_buffer(&tr->array_buffer); kfree(tr->module_delta); -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT free_trace_buffer(&tr->snapshot_buffer); #endif } @@ -9561,7 +9564,7 @@ trace_array_create_systems(const char *name, const char *systems, tr->syscall_buf_sz = global_trace.syscall_buf_sz; tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT spin_lock_init(&tr->snapshot_trigger_lock); #endif tr->current_trace = &nop_trace; @@ -10515,7 +10518,7 @@ ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, return done; } -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT __init static bool tr_needs_alloc_snapshot(const char *name) { char *test; @@ -10705,7 +10708,7 @@ __init static void enable_instances(void) } } else { /* Only non mapped buffers have snapshot buffers */ - if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE)) + if (IS_ENABLED(CONFIG_TRACER_SNAPSHOT)) do_allocate_snapshot(name); } @@ -10832,7 +10835,7 @@ __init static int tracer_alloc_buffers(void) global_trace.current_trace_flags = nop_trace.flags; global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT spin_lock_init(&global_trace.snapshot_trigger_lock); #endif ftrace_init_global_array_ops(&global_trace); @@ -10900,7 +10903,7 @@ struct trace_array *trace_get_global_array(void) void __init ftrace_boot_snapshot(void) { -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT struct trace_array *tr; if (!snapshot_at_boot) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ebb47abc0ee7..649fdd20fc91 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -329,7 +329,7 @@ struct trace_array { struct list_head list; char *name; struct array_buffer array_buffer; -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT /* * The snapshot_buffer is used to snapshot the trace when a maximum * latency is reached, or when the user initiates a snapshot. @@ -346,13 +346,16 @@ struct trace_array { bool allocated_snapshot; spinlock_t snapshot_trigger_lock; unsigned int snapshot; +#ifdef CONFIG_TRACER_MAX_TRACE unsigned long max_latency; #ifdef CONFIG_FSNOTIFY struct dentry *d_max_latency; struct work_struct fsnotify_work; struct irq_work fsnotify_irqwork; -#endif -#endif +#endif /* CONFIG_FSNOTIFY */ +#endif /* CONFIG_TRACER_MAX_TRACE */ +#endif /* CONFIG_TRACER_SNAPSHOT */ + /* The below is for memory mapped ring buffer */ unsigned int mapped; unsigned long range_addr_start; @@ -378,7 +381,7 @@ struct trace_array { * * It is also used in other places outside the update_max_tr * so it needs to be defined outside of the - * CONFIG_TRACER_MAX_TRACE. + * CONFIG_TRACER_SNAPSHOT. */ arch_spinlock_t max_lock; #ifdef CONFIG_FTRACE_SYSCALLS @@ -791,22 +794,22 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, struct trace_pid_list **new_pid_list, const char __user *ubuf, size_t cnt); -#ifdef CONFIG_TRACER_MAX_TRACE +#ifdef CONFIG_TRACER_SNAPSHOT void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, void *cond_data); void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); -#ifdef CONFIG_FSNOTIFY -#define LATENCY_FS_NOTIFY +#if defined(CONFIG_TRACER_MAX_TRACE) && defined(CONFIG_FSNOTIFY) +# define LATENCY_FS_NOTIFY #endif -#endif /* CONFIG_TRACER_MAX_TRACE */ #ifdef LATENCY_FS_NOTIFY void latency_fsnotify(struct trace_array *tr); #else static inline void latency_fsnotify(struct trace_array *tr) { } #endif +#endif /* CONFIG_TRACER_SNAPSHOT */ #ifdef CONFIG_STACKTRACE void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, int skip); -- cgit v1.2.3 From b4bade506b18eb2e5e34ac84f915d7ee6156d4e2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 9 Feb 2026 19:46:31 -0500 Subject: tracing: Move d_max_latency out of CONFIG_FSNOTIFY protection The tracing_max_latency shouldn't be limited if CONFIG_FSNOTIFY is defined or not and it was moved out of that protection to be always available with CONFIG_TRACER_MAX_TRACE. All was moved out except the dentry descriptor for it (d_max_latency) and it failed to build on some configs. Move that out of the CONFIG_FSNOTIFY protection too. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260209194631.788bfc85@fedora Fixes: ba73713da50e ("tracing: Clean up use of trace_create_maxlat_file()") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202602092133.fTdojd95-lkp@intel.com/ Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 649fdd20fc91..7894bf55743c 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -348,8 +348,8 @@ struct trace_array { unsigned int snapshot; #ifdef CONFIG_TRACER_MAX_TRACE unsigned long max_latency; -#ifdef CONFIG_FSNOTIFY struct dentry *d_max_latency; +#ifdef CONFIG_FSNOTIFY struct work_struct fsnotify_work; struct irq_work fsnotify_irqwork; #endif /* CONFIG_FSNOTIFY */ -- cgit v1.2.3 From f743435f988cb0cf1f521035aee857851b25e06d Mon Sep 17 00:00:00 2001 From: Colin Lord Date: Mon, 9 Feb 2026 23:48:10 -0800 Subject: tracing: Fix false sharing in hwlat get_sample() The get_sample() function in the hwlat tracer assumes the caller holds hwlat_data.lock, but this is not actually happening. The result is unprotected data access to hwlat_data, and in per-cpu mode can result in false sharing which may show up as false positive latency events. The specific case of false sharing observed was primarily between hwlat_data.sample_width and hwlat_data.count. These are separated by just 8B and are therefore likely to share a cache line. When one thread modifies count, the cache line is in a modified state so when other threads read sample_width in the main latency detection loop, they fetch the modified cache line. On some systems, the fetch itself may be slow enough to count as a latency event, which could set up a self reinforcing cycle of latency events as each event increments count which then causes more latency events, continuing the cycle. The other result of the unprotected data access is that hwlat_data.count can end up with duplicate or missed values, which was observed on some systems in testing. Convert hwlat_data.count to atomic64_t so it can be safely modified without locking, and prevent false sharing by pulling sample_width into a local variable. One system this was tested on was a dual socket server with 32 CPUs on each numa node. With settings of 1us threshold, 1000us width, and 2000us window, this change reduced the number of latency events from 500 per second down to approximately 1 event per minute. Some machines tested did not exhibit measurable latency from the false sharing. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260210074810.6328-1-clord@mykolab.com Signed-off-by: Colin Lord Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_hwlat.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 2f7b94e98317..3fe274b84f1c 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -102,9 +102,9 @@ struct hwlat_sample { /* keep the global state somewhere. */ static struct hwlat_data { - struct mutex lock; /* protect changes */ + struct mutex lock; /* protect changes */ - u64 count; /* total since reset */ + atomic64_t count; /* total since reset */ u64 sample_window; /* total sampling window (on+off) */ u64 sample_width; /* active sampling portion of window */ @@ -193,8 +193,7 @@ void trace_hwlat_callback(bool enter) * get_sample - sample the CPU TSC and look for likely hardware latencies * * Used to repeatedly capture the CPU TSC (or similar), looking for potential - * hardware-induced latency. Called with interrupts disabled and with - * hwlat_data.lock held. + * hardware-induced latency. Called with interrupts disabled. */ static int get_sample(void) { @@ -204,6 +203,7 @@ static int get_sample(void) time_type start, t1, t2, last_t2; s64 diff, outer_diff, total, last_total = 0; u64 sample = 0; + u64 sample_width = READ_ONCE(hwlat_data.sample_width); u64 thresh = tracing_thresh; u64 outer_sample = 0; int ret = -1; @@ -267,7 +267,7 @@ static int get_sample(void) if (diff > sample) sample = diff; /* only want highest value */ - } while (total <= hwlat_data.sample_width); + } while (total <= sample_width); barrier(); /* finish the above in the view for NMIs */ trace_hwlat_callback_enabled = false; @@ -285,8 +285,7 @@ static int get_sample(void) if (kdata->nmi_total_ts) do_div(kdata->nmi_total_ts, NSEC_PER_USEC); - hwlat_data.count++; - s.seqnum = hwlat_data.count; + s.seqnum = atomic64_inc_return(&hwlat_data.count); s.duration = sample; s.outer_duration = outer_sample; s.nmi_total_ts = kdata->nmi_total_ts; @@ -832,7 +831,7 @@ static int hwlat_tracer_init(struct trace_array *tr) hwlat_trace = tr; - hwlat_data.count = 0; + atomic64_set(&hwlat_data.count, 0); tr->max_latency = 0; save_tracing_thresh = tracing_thresh; -- cgit v1.2.3 From f844282deed7481cf2f813933229261e27306551 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 10 Feb 2026 17:43:36 +0900 Subject: tracing: Fix to set write permission to per-cpu buffer_size_kb Since the per-cpu buffer_size_kb file is writable for changing per-cpu ring buffer size, the file should have the write access permission. Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Link: https://patch.msgid.link/177071301597.2293046.11683339475076917920.stgit@mhiramat.tok.corp.google.com Fixes: 21ccc9cd7211 ("tracing: Disable "other" permission bits in the tracefs files") Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 845b8a165daf..fd470675809b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8613,7 +8613,7 @@ tracing_init_tracefs_percpu(struct trace_array *tr, long cpu) trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu, tr, cpu, &tracing_stats_fops); - trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu, + trace_create_cpu_file("buffer_size_kb", TRACE_MODE_WRITE, d_cpu, tr, cpu, &tracing_entries_fops); if (tr->range_addr_start) -- cgit v1.2.3 From 804c4a2209bcf6ed4c45386f033e4d0f7c5bfda5 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 10 Feb 2026 17:43:43 +0900 Subject: tracing: Reset last_boot_info if ring buffer is reset Commit 32dc0042528d ("tracing: Reset last-boot buffers when reading out all cpu buffers") resets the last_boot_info when user read out all data via trace_pipe* files. But it is not reset when user resets the buffer from other files. (e.g. write `trace` file) Reset it when the corresponding ring buffer is reset too. Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Link: https://patch.msgid.link/177071302364.2293046.17895165659153977720.stgit@mhiramat.tok.corp.google.com Fixes: 32dc0042528d ("tracing: Reset last-boot buffers when reading out all cpu buffers") Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fd470675809b..e884d32b7895 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4127,6 +4127,8 @@ static int tracing_single_release_tr(struct inode *inode, struct file *file) return single_release(inode, file); } +static bool update_last_data_if_empty(struct trace_array *tr); + static int tracing_open(struct inode *inode, struct file *file) { struct trace_array *tr = inode->i_private; @@ -4151,6 +4153,8 @@ static int tracing_open(struct inode *inode, struct file *file) tracing_reset_online_cpus(trace_buf); else tracing_reset_cpu(trace_buf, cpu); + + update_last_data_if_empty(tr); } if (file->f_mode & FMODE_READ) { @@ -5215,6 +5219,7 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, int tracer_init(struct tracer *t, struct trace_array *tr) { tracing_reset_online_cpus(&tr->array_buffer); + update_last_data_if_empty(tr); return t->init(tr); } @@ -7028,6 +7033,7 @@ int tracing_set_clock(struct trace_array *tr, const char *clockstr) ring_buffer_set_clock(tr->snapshot_buffer.buffer, trace_clocks[i].func); tracing_reset_online_cpus(&tr->snapshot_buffer); #endif + update_last_data_if_empty(tr); if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) { struct trace_scratch *tscratch = tr->scratch; -- cgit v1.2.3 From fa4820b893843f7ad5e1b5c446a92426c5c946ce Mon Sep 17 00:00:00 2001 From: Haoyang LIU Date: Tue, 10 Feb 2026 23:39:02 +0800 Subject: tracing: Fix indentation of return statement in print_trace_fmt() The return statement inside the nested if block in print_trace_fmt() is not properly indented, making the code structure unclear. This was flagged by smatch as a warning. Add proper indentation to the return statement to match the kernel coding style and improve readability. Cc: Mathieu Desnoyers Link: https://patch.msgid.link/20260210153903.8041-1-tttturtleruss@gmail.com Signed-off-by: Haoyang LIU Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e884d32b7895..2f6fbf9e7caf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3537,7 +3537,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) /* ftrace and system call events are still OK */ if ((event->type > __TRACE_LAST_TYPE) && !is_syscall_event(event)) - return print_event_fields(iter, event); + return print_event_fields(iter, event); } return event->funcs->trace(iter, sym_flags, event); } -- cgit v1.2.3 From 53b2fae90ff01fede6520ca744ed5e8e366497ba Mon Sep 17 00:00:00 2001 From: Shengming Hu Date: Fri, 13 Feb 2026 14:29:32 +0800 Subject: function_graph: Restore direct mode when callbacks drop to one When registering a second fgraph callback, direct path is disabled and array loop is used instead. When ftrace_graph_active falls back to one, we try to re-enable direct mode via ftrace_graph_enable_direct(true, ...). But ftrace_graph_enable_direct() incorrectly disables the static key rather than enabling it. This leaves fgraph_do_direct permanently off after first multi-callback transition, so direct fast mode is never restored. Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20260213142932519cuWSpEXeS4-UnCvNXnK2P@zte.com.cn Fixes: cc60ee813b503 ("function_graph: Use static_call and branch to optimize entry function") Signed-off-by: Shengming Hu Signed-off-by: Steven Rostedt (Google) --- kernel/trace/fgraph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index cc48d16be43e..4df766c690f9 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1303,7 +1303,7 @@ static void ftrace_graph_enable_direct(bool enable_branch, struct fgraph_ops *go static_call_update(fgraph_func, func); static_call_update(fgraph_retfunc, retfunc); if (enable_branch) - static_branch_disable(&fgraph_do_direct); + static_branch_enable(&fgraph_do_direct); } static void ftrace_graph_disable_direct(bool disable_branch) -- cgit v1.2.3