From f915fcb38553eb9150a918348d932fd292de71dc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Sep 2023 23:31:37 +0200 Subject: bpf: Count stats for kprobe_multi programs Adding support to gather missed stats for kprobe_multi programs due to bpf_prog_active protection. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Tested-by: Song Liu Reviewed-by: Song Liu Link: https://lore.kernel.org/bpf/20230920213145.1941596-2-jolsa@kernel.org --- kernel/trace/bpf_trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 868008f56fec..54827d04c9a6 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2710,6 +2710,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, int err; if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { + bpf_prog_inc_misses_counter(link->link.prog); err = 0; goto out; } -- cgit v1.2.3 From e2b2cd592adbd303bcc02451d32fedd511000fb0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Sep 2023 23:31:38 +0200 Subject: bpf: Add missed value to kprobe_multi link info Add missed value to kprobe_multi link info to hold the stats of missed kprobe_multi probe. The missed counter gets incremented when fprobe fails the recursion check or there's no rethook available for return probe. In either case the attached bpf program is not executed. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Tested-by: Song Liu Reviewed-by: Song Liu Acked-by: Hou Tao Link: https://lore.kernel.org/bpf/20230920213145.1941596-3-jolsa@kernel.org --- include/uapi/linux/bpf.h | 1 + kernel/trace/bpf_trace.c | 1 + tools/include/uapi/linux/bpf.h | 1 + 3 files changed, 3 insertions(+) (limited to 'kernel/trace') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5f13db15a3c7..1da5b1bcce71 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6532,6 +6532,7 @@ struct bpf_link_info { __aligned_u64 addrs; __u32 count; /* in/out: kprobe_multi function count */ __u32 flags; + __u64 missed; } kprobe_multi; struct { __u32 type; /* enum bpf_perf_event_type */ diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 54827d04c9a6..6aec6e7d612a 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2614,6 +2614,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); info->kprobe_multi.count = kmulti_link->cnt; info->kprobe_multi.flags = kmulti_link->flags; + info->kprobe_multi.missed = kmulti_link->fp.nmissed; if (!uaddrs) return 0; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 5f13db15a3c7..1da5b1bcce71 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6532,6 +6532,7 @@ struct bpf_link_info { __aligned_u64 addrs; __u32 count; /* in/out: kprobe_multi function count */ __u32 flags; + __u64 missed; } kprobe_multi; struct { __u32 type; /* enum bpf_perf_event_type */ -- cgit v1.2.3 From 3acf8ace68230e9558cf916847f1cc9f208abdf1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Sep 2023 23:31:39 +0200 Subject: bpf: Add missed value to kprobe perf link info Add missed value to kprobe attached through perf link info to hold the stats of missed kprobe handler execution. The kprobe's missed counter gets incremented when kprobe handler is not executed due to another kprobe running on the same cpu. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230920213145.1941596-4-jolsa@kernel.org --- include/linux/trace_events.h | 6 ++++-- include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 14 ++++++++------ kernel/trace/bpf_trace.c | 5 +++-- kernel/trace/trace_kprobe.c | 14 +++++++++++--- tools/include/uapi/linux/bpf.h | 1 + 6 files changed, 28 insertions(+), 13 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 21ae37e49319..5eb88a66eb68 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -761,7 +761,8 @@ struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name); void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp); int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, - u64 *probe_offset, u64 *probe_addr); + u64 *probe_offset, u64 *probe_addr, + unsigned long *missed); int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); #else @@ -801,7 +802,7 @@ static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) static inline int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, u64 *probe_offset, - u64 *probe_addr) + u64 *probe_addr, unsigned long *missed) { return -EOPNOTSUPP; } @@ -877,6 +878,7 @@ extern void perf_kprobe_destroy(struct perf_event *event); extern int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, u64 *probe_addr, + unsigned long *missed, bool perf_type_tracepoint); #endif #ifdef CONFIG_UPROBE_EVENTS diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1da5b1bcce71..70bfa997e896 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6548,6 +6548,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from func_name */ __u64 addr; + __u64 missed; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 85c1d908f70f..6b5280f14a53 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3374,7 +3374,7 @@ static void bpf_perf_link_dealloc(struct bpf_link *link) static int bpf_perf_link_fill_common(const struct perf_event *event, char __user *uname, u32 ulen, u64 *probe_offset, u64 *probe_addr, - u32 *fd_type) + u32 *fd_type, unsigned long *missed) { const char *buf; u32 prog_id; @@ -3385,7 +3385,7 @@ static int bpf_perf_link_fill_common(const struct perf_event *event, return -EINVAL; err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf, - probe_offset, probe_addr); + probe_offset, probe_addr, missed); if (err) return err; if (!uname) @@ -3408,6 +3408,7 @@ static int bpf_perf_link_fill_common(const struct perf_event *event, static int bpf_perf_link_fill_kprobe(const struct perf_event *event, struct bpf_link_info *info) { + unsigned long missed; char __user *uname; u64 addr, offset; u32 ulen, type; @@ -3416,7 +3417,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.kprobe.func_name); ulen = info->perf_event.kprobe.name_len; err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr, - &type); + &type, &missed); if (err) return err; if (type == BPF_FD_TYPE_KRETPROBE) @@ -3425,6 +3426,7 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, info->perf_event.type = BPF_PERF_EVENT_KPROBE; info->perf_event.kprobe.offset = offset; + info->perf_event.kprobe.missed = missed; if (!kallsyms_show_value(current_cred())) addr = 0; info->perf_event.kprobe.addr = addr; @@ -3444,7 +3446,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.uprobe.file_name); ulen = info->perf_event.uprobe.name_len; err = bpf_perf_link_fill_common(event, uname, ulen, &offset, &addr, - &type); + &type, NULL); if (err) return err; @@ -3480,7 +3482,7 @@ static int bpf_perf_link_fill_tracepoint(const struct perf_event *event, uname = u64_to_user_ptr(info->perf_event.tracepoint.tp_name); ulen = info->perf_event.tracepoint.name_len; info->perf_event.type = BPF_PERF_EVENT_TRACEPOINT; - return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL); + return bpf_perf_link_fill_common(event, uname, ulen, NULL, NULL, NULL, NULL); } static int bpf_perf_link_fill_perf_event(const struct perf_event *event, @@ -4813,7 +4815,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr, err = bpf_get_perf_event_info(event, &prog_id, &fd_type, &buf, &probe_offset, - &probe_addr); + &probe_addr, NULL); if (!err) err = bpf_task_fd_query_copy(attr, uattr, prog_id, fd_type, buf, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 6aec6e7d612a..f6a7d2524949 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2384,7 +2384,8 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, - u64 *probe_offset, u64 *probe_addr) + u64 *probe_offset, u64 *probe_addr, + unsigned long *missed) { bool is_tracepoint, is_syscall_tp; struct bpf_prog *prog; @@ -2419,7 +2420,7 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, #ifdef CONFIG_KPROBE_EVENTS if (flags & TRACE_EVENT_FL_KPROBE) err = bpf_get_kprobe_info(event, fd_type, buf, - probe_offset, probe_addr, + probe_offset, probe_addr, missed, event->attr.type == PERF_TYPE_TRACEPOINT); #endif #ifdef CONFIG_UPROBE_EVENTS diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3d7a180a8427..961a78ffd6d2 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1189,6 +1189,12 @@ static const struct file_operations kprobe_events_ops = { .write = probes_write, }; +static unsigned long trace_kprobe_missed(struct trace_kprobe *tk) +{ + return trace_kprobe_is_return(tk) ? + tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed; +} + /* Probes profiling interfaces */ static int probes_profile_seq_show(struct seq_file *m, void *v) { @@ -1200,8 +1206,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) return 0; tk = to_trace_kprobe(ev); - nmissed = trace_kprobe_is_return(tk) ? - tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed; + nmissed = trace_kprobe_missed(tk); seq_printf(m, " %-44s %15lu %15lu\n", trace_probe_name(&tk->tp), trace_kprobe_nhit(tk), @@ -1547,7 +1552,8 @@ NOKPROBE_SYMBOL(kretprobe_perf_func); int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, const char **symbol, u64 *probe_offset, - u64 *probe_addr, bool perf_type_tracepoint) + u64 *probe_addr, unsigned long *missed, + bool perf_type_tracepoint) { const char *pevent = trace_event_name(event->tp_event); const char *group = event->tp_event->class->system; @@ -1566,6 +1572,8 @@ int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type, *probe_addr = kallsyms_show_value(current_cred()) ? (unsigned long)tk->rp.kp.addr : 0; *symbol = tk->symbol; + if (missed) + *missed = trace_kprobe_missed(tk); return 0; } #endif /* CONFIG_PERF_EVENTS */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1da5b1bcce71..70bfa997e896 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6548,6 +6548,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from func_name */ __u64 addr; + __u64 missed; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ -- cgit v1.2.3 From dd8657894c11b03c6eb0fd53fe9d7fec2072d18b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Sep 2023 23:31:40 +0200 Subject: bpf: Count missed stats in trace_call_bpf Increase misses stats in case bpf array execution is skipped because of recursion check in trace_call_bpf. Adding bpf_prog_inc_misses_counters that increase misses counts for all bpf programs in bpf_prog_array. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Tested-by: Song Liu Reviewed-by: Song Liu Link: https://lore.kernel.org/bpf/20230920213145.1941596-5-jolsa@kernel.org --- include/linux/bpf.h | 16 ++++++++++++++++ kernel/trace/bpf_trace.c | 3 +++ 2 files changed, 19 insertions(+) (limited to 'kernel/trace') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 30063a760b5a..a82efd34b741 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2922,6 +2922,22 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ +static __always_inline void +bpf_prog_inc_misses_counters(const struct bpf_prog_array *array) +{ + const struct bpf_prog_array_item *item; + struct bpf_prog *prog; + + if (unlikely(!array)) + return; + + item = &array->items[0]; + while ((prog = READ_ONCE(item->prog))) { + bpf_prog_inc_misses_counter(prog); + item++; + } +} + #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) void bpf_sk_reuseport_detach(struct sock *sk); int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f6a7d2524949..df697c74d519 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -117,6 +117,9 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) * and don't send kprobe event into ring-buffer, * so return zero here */ + rcu_read_lock(); + bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array)); + rcu_read_unlock(); ret = 0; goto out; } -- cgit v1.2.3 From ba8ea72388a192c10f1ee5f5a4a32332e7cced76 Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Fri, 13 Oct 2023 07:42:19 +0200 Subject: bpf: Change syscall_nr type to int in struct syscall_tp_t linux-rt-devel tree contains a patch (b1773eac3f29c ("sched: Add support for lazy preemption")) that adds an extra member to struct trace_entry. This causes the offset of args field in struct trace_event_raw_sys_enter be different from the one in struct syscall_trace_enter: struct trace_event_raw_sys_enter { struct trace_entry ent; /* 0 12 */ /* XXX last struct has 3 bytes of padding */ /* XXX 4 bytes hole, try to pack */ long int id; /* 16 8 */ long unsigned int args[6]; /* 24 48 */ /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */ char __data[]; /* 72 0 */ /* size: 72, cachelines: 2, members: 4 */ /* sum members: 68, holes: 1, sum holes: 4 */ /* paddings: 1, sum paddings: 3 */ /* last cacheline: 8 bytes */ }; struct syscall_trace_enter { struct trace_entry ent; /* 0 12 */ /* XXX last struct has 3 bytes of padding */ int nr; /* 12 4 */ long unsigned int args[]; /* 16 0 */ /* size: 16, cachelines: 1, members: 3 */ /* paddings: 1, sum paddings: 3 */ /* last cacheline: 16 bytes */ }; This, in turn, causes perf_event_set_bpf_prog() fail while running bpf test_profiler testcase because max_ctx_offset is calculated based on the former struct, while off on the latter: 10488 if (is_tracepoint || is_syscall_tp) { 10489 int off = trace_event_get_offsets(event->tp_event); 10490 10491 if (prog->aux->max_ctx_offset > off) 10492 return -EACCES; 10493 } What bpf program is actually getting is a pointer to struct syscall_tp_t, defined in kernel/trace/trace_syscalls.c. This patch fixes the problem by aligning struct syscall_tp_t with struct syscall_trace_(enter|exit) and changing the tests to use these structs to dereference context. Signed-off-by: Artem Savkov Signed-off-by: Andrii Nakryiko Acked-by: Steven Rostedt (Google) Link: https://lore.kernel.org/bpf/20231013054219.172920-1-asavkov@redhat.com --- kernel/trace/trace_syscalls.c | 4 ++-- tools/testing/selftests/bpf/progs/profiler.inc.h | 2 +- tools/testing/selftests/bpf/progs/test_vmlinux.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index de753403cdaf..9c581d6da843 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -556,7 +556,7 @@ static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *re { struct syscall_tp_t { struct trace_entry ent; - unsigned long syscall_nr; + int syscall_nr; unsigned long args[SYSCALL_DEFINE_MAXARGS]; } __aligned(8) param; int i; @@ -661,7 +661,7 @@ static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *reg { struct syscall_tp_t { struct trace_entry ent; - unsigned long syscall_nr; + int syscall_nr; unsigned long ret; } __aligned(8) param; diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index f799d87e8700..897061930cb7 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -609,7 +609,7 @@ out: } SEC("tracepoint/syscalls/sys_enter_kill") -int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx) +int tracepoint__syscalls__sys_enter_kill(struct syscall_trace_enter* ctx) { struct bpf_func_stats_ctx stats_ctx; diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c index 4b8e37f7fd06..78b23934d9f8 100644 --- a/tools/testing/selftests/bpf/progs/test_vmlinux.c +++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c @@ -16,12 +16,12 @@ bool kprobe_called = false; bool fentry_called = false; SEC("tp/syscalls/sys_enter_nanosleep") -int handle__tp(struct trace_event_raw_sys_enter *args) +int handle__tp(struct syscall_trace_enter *args) { struct __kernel_timespec *ts; long tv_nsec; - if (args->id != __NR_nanosleep) + if (args->nr != __NR_nanosleep) return 0; ts = (void *)args->args[0]; -- cgit v1.2.3