From 2cddfc2e8fc78c13b0f5286ea5dd48cdf527ad41 Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Fri, 26 Dec 2025 11:07:24 -0500 Subject: tracing: Add bitmask-list option for human-readable bitmask display Add support for displaying bitmasks in human-readable list format (e.g., 0,2-5,7) in addition to the default hexadecimal bitmap representation. This is particularly useful when tracing CPU masks and other large bitmasks where individual bit positions are more meaningful than their hexadecimal encoding. When the "bitmask-list" option is enabled, the printk "%*pbl" format specifier is used to render bitmasks as comma-separated ranges, making trace output easier to interpret for complex CPU configurations and large bitmask values. Link: https://patch.msgid.link/20251226160724.2246493-2-atomlin@atomlin.com Signed-off-by: Aaron Tomlin Signed-off-by: Steven Rostedt (Google) --- include/trace/stages/stage3_trace_output.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/stages/stage3_trace_output.h b/include/trace/stages/stage3_trace_output.h index 1e7b0bef95f5..fce85ea2df1c 100644 --- a/include/trace/stages/stage3_trace_output.h +++ b/include/trace/stages/stage3_trace_output.h @@ -39,7 +39,7 @@ void *__bitmask = __get_dynamic_array(field); \ unsigned int __bitmask_size; \ __bitmask_size = __get_dynamic_array_len(field); \ - trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ + trace_print_bitmask_seq(iter, __bitmask, __bitmask_size); \ }) #undef __get_cpumask @@ -51,7 +51,7 @@ void *__bitmask = __get_rel_dynamic_array(field); \ unsigned int __bitmask_size; \ __bitmask_size = __get_rel_dynamic_array_len(field); \ - trace_print_bitmask_seq(p, __bitmask, __bitmask_size); \ + trace_print_bitmask_seq(iter, __bitmask, __bitmask_size); \ }) #undef __get_rel_cpumask -- cgit v1.2.3 From 16718274ee75d24a21b10ebc8998c48cc14a656a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 26 Jan 2026 18:11:46 -0500 Subject: tracing: perf: Have perf tracepoint callbacks always disable preemption In preparation to convert protection of tracepoints from being protected by a preempt disabled section to being protected by SRCU, have all the perf callbacks disable preemption as perf expects preemption to be disabled when processing tracepoints. While at it, convert the perf system call callback preempt_disable() to a guard(preempt). Link: https://lore.kernel.org/all/20250613152218.1924093-1-bigeasy@linutronix.de/ Link: https://patch.msgid.link/20260108220550.2f6638f3@fedora Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: "Paul E. McKenney" Cc: Sebastian Andrzej Siewior Cc: Alexei Starovoitov Link: https://patch.msgid.link/20260126231256.174621257@kernel.org Signed-off-by: Steven Rostedt (Google) --- include/trace/perf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/perf.h b/include/trace/perf.h index a1754b73a8f5..348ad1d9b556 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -71,6 +71,7 @@ perf_trace_##call(void *__data, proto) \ u64 __count __attribute__((unused)); \ struct task_struct *__task __attribute__((unused)); \ \ + guard(preempt_notrace)(); \ do_perf_trace_##call(__data, args); \ } @@ -85,9 +86,8 @@ perf_trace_##call(void *__data, proto) \ struct task_struct *__task __attribute__((unused)); \ \ might_fault(); \ - preempt_disable_notrace(); \ + guard(preempt_notrace)(); \ do_perf_trace_##call(__data, args); \ - preempt_enable_notrace(); \ } /* -- cgit v1.2.3 From a46023d5616ed3ed781e56ca93400eb9490e3646 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 26 Jan 2026 18:11:48 -0500 Subject: tracing: Guard __DECLARE_TRACE() use of __DO_TRACE_CALL() with SRCU-fast The current use of guard(preempt_notrace)() within __DECLARE_TRACE() to protect invocation of __DO_TRACE_CALL() means that BPF programs attached to tracepoints are non-preemptible. This is unhelpful in real-time systems, whose users apparently wish to use BPF while also achieving low latencies. (Who knew?) One option would be to use preemptible RCU, but this introduces many opportunities for infinite recursion, which many consider to be counterproductive, especially given the relatively small stacks provided by the Linux kernel. These opportunities could be shut down by sufficiently energetic duplication of code, but this sort of thing is considered impolite in some circles. Therefore, use the shiny new SRCU-fast API, which provides somewhat faster readers than those of preemptible RCU, at least on Paul E. McKenney's laptop, where task_struct access is more expensive than access to per-CPU variables. And SRCU-fast provides way faster readers than does SRCU, courtesy of being able to avoid the read-side use of smp_mb(). Also, it is quite straightforward to create srcu_read_{,un}lock_fast_notrace() functions. Link: https://lore.kernel.org/all/20250613152218.1924093-1-bigeasy@linutronix.de/ Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Sebastian Andrzej Siewior Cc: Alexei Starovoitov Link: https://patch.msgid.link/20260126231256.499701982@kernel.org Co-developed-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 9 +++++---- include/trace/trace_events.h | 4 ++-- kernel/tracepoint.c | 18 ++++++++++++++---- 3 files changed, 21 insertions(+), 10 deletions(-) (limited to 'include/trace') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 8a56f3278b1b..22ca1c8b54f3 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -108,14 +108,15 @@ void for_each_tracepoint_in_module(struct module *mod, * An alternative is to use the following for batch reclaim associated * with a given tracepoint: * - * - tracepoint_is_faultable() == false: call_rcu() + * - tracepoint_is_faultable() == false: call_srcu() * - tracepoint_is_faultable() == true: call_rcu_tasks_trace() */ #ifdef CONFIG_TRACEPOINTS +extern struct srcu_struct tracepoint_srcu; static inline void tracepoint_synchronize_unregister(void) { synchronize_rcu_tasks_trace(); - synchronize_rcu(); + synchronize_srcu(&tracepoint_srcu); } static inline bool tracepoint_is_faultable(struct tracepoint *tp) { @@ -275,13 +276,13 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) return static_branch_unlikely(&__tracepoint_##name.key);\ } -#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ +#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ __DECLARE_TRACE_COMMON(name, PARAMS(proto), PARAMS(args), PARAMS(data_proto)) \ static inline void __do_trace_##name(proto) \ { \ TRACEPOINT_CHECK(name) \ if (cond) { \ - guard(preempt_notrace)(); \ + guard(srcu_fast_notrace)(&tracepoint_srcu); \ __DO_TRACE_CALL(name, TP_ARGS(args)); \ } \ } \ diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 4f22136fd465..fbc07d353be6 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -436,6 +436,7 @@ __DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \ static notrace void \ trace_event_raw_event_##call(void *__data, proto) \ { \ + guard(preempt_notrace)(); \ do_trace_event_raw_event_##call(__data, args); \ } @@ -447,9 +448,8 @@ static notrace void \ trace_event_raw_event_##call(void *__data, proto) \ { \ might_fault(); \ - preempt_disable_notrace(); \ + guard(preempt_notrace)(); \ do_trace_event_raw_event_##call(__data, args); \ - preempt_enable_notrace(); \ } /* diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 62719d2941c9..fd2ee879815c 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -34,9 +34,13 @@ enum tp_transition_sync { struct tp_transition_snapshot { unsigned long rcu; + unsigned long srcu_gp; bool ongoing; }; +DEFINE_SRCU_FAST(tracepoint_srcu); +EXPORT_SYMBOL_GPL(tracepoint_srcu); + /* Protected by tracepoints_mutex */ static struct tp_transition_snapshot tp_transition_snapshot[_NR_TP_TRANSITION_SYNC]; @@ -46,6 +50,7 @@ static void tp_rcu_get_state(enum tp_transition_sync sync) /* Keep the latest get_state snapshot. */ snapshot->rcu = get_state_synchronize_rcu(); + snapshot->srcu_gp = start_poll_synchronize_srcu(&tracepoint_srcu); snapshot->ongoing = true; } @@ -56,6 +61,8 @@ static void tp_rcu_cond_sync(enum tp_transition_sync sync) if (!snapshot->ongoing) return; cond_synchronize_rcu(snapshot->rcu); + if (!poll_state_synchronize_srcu(&tracepoint_srcu, snapshot->srcu_gp)) + synchronize_srcu(&tracepoint_srcu); snapshot->ongoing = false; } @@ -112,10 +119,13 @@ static inline void release_probes(struct tracepoint *tp, struct tracepoint_func struct tp_probes *tp_probes = container_of(old, struct tp_probes, probes[0]); - if (tracepoint_is_faultable(tp)) - call_rcu_tasks_trace(&tp_probes->rcu, rcu_free_old_probes); - else - call_rcu(&tp_probes->rcu, rcu_free_old_probes); + if (tracepoint_is_faultable(tp)) { + call_rcu_tasks_trace(&tp_probes->rcu, + rcu_free_old_probes); + } else { + call_srcu(&tracepoint_srcu, &tp_probes->rcu, + rcu_free_old_probes); + } } } -- cgit v1.2.3