diff options
| author | Alexei Starovoitov <ast@kernel.org> | 2025-10-27 09:56:28 -0700 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2025-10-27 09:56:28 -0700 |
| commit | ff880798de39251aa5e28ab389c16d294b7af658 (patch) | |
| tree | fd8c51839c4790e65e3130c6d54c532b3496d472 /tools | |
| parent | e7586577b75f811bd14c12f2dd70afc3ece4756b (diff) | |
| parent | 784cdf931543805120aed1e1c43df8e6fa436a55 (diff) | |
Merge branch 'bpf-introduce-file-dynptr'
Mykyta Yatsenko says:
====================
bpf: Introduce file dynptr
From: Mykyta Yatsenko <yatsenko@meta.com>
This series adds a new dynptr kind, file dynptr, which enables BPF
programs to perform safe reads from files in a structured way.
Initial motivations include:
* Parsing the executable’s ELF to locate thread-local variable symbols
* Capturing stack traces when frame pointers are disabled
By leveraging the existing dynptr abstraction, we reuse the verifier’s
lifetime/size checks and keep the API consistent with existing dynptr
read helpers.
Technical details:
1. Reuses the existing freader library to read files a folio at a time.
2. bpf_dynptr_slice() and bpf_dynptr_read() always copy data from folios
into a program-provided buffer; zero-copy access is intentionally not
supported to keep it simple.
3. Reads may sleep if the requested folios are not in the page cache.
4. Few verifier changes required:
* Support dynptr destruction in kfuncs
* Add kfunc address substitution based on whether the program runs in
a sleepable or non-sleepable context.
Testing:
The final patch adds a selftest that validates BPF program reads the
same data as userspace, page faults are enabled in sleepable context and
disabled in non-sleepable.
Changelog:
---
v4 -> v5
v4: https://lore.kernel.org/all/20251021200334.220542-1-mykyta.yatsenko5@gmail.com/
* Inlined and removed kfunc_call_imm(), run overflow check for call_imm
only if !bpf_jit_supports_far_kfunc_call().
v3 -> v4
v3: https://lore.kernel.org/bpf/20251020222538.932915-1-mykyta.yatsenko5@gmail.com/
* Remove ringbuf usage from selftests
* bpf_dynptr_set_null(ptr) when discarding file dynptr
* call kfunc_call_imm() in specialize_kfunc() only, removed
call from add_kfunc_call()
v2 -> v3
v2: https://lore.kernel.org/bpf/20251015161155.120148-1-mykyta.yatsenko5@gmail.com/
* Add negative tests
* Rewrote tests to use LSM for bpf_get_task_exe_file()
* Move call_imm overflow check into kfunc_call_imm()
v1 -> v2
v1: https://lore.kernel.org/bpf/20251003160416.585080-1-mykyta.yatsenko5@gmail.com/
* Remove ELF parsing selftest
* Expanded u32 -> u64 refactoring, changes in include/uapi/linux/bpf.h
* Removed freader.{c,h}, instead move freader definitions into
buildid.h.
* Small refactoring of the multiple folios reading algorithm
* Directly return error after unmark_stack_slots_dynptr().
* Make kfuncs receive trusted arguments.
* Remove enum bpf_is_sleepable, use bool instead
* Remove unnecessary sorting from specialize_kfunc()
* Remove bool kfunc_in_sleepable_ctx; field from the struct
bpf_insn_aux_data, rely on non_sleepable field introduced by Kumar
* Refactor selftests, do madvise(...MADV_PAGEOUT) for all pages read by
the test
* Introduce the test for non-sleepable case, verify it fails with -EFAULT
====================
Link: https://lore.kernel.org/r/20251026203853.135105-1-mykyta.yatsenko5@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/include/uapi/linux/bpf.h | 8 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/bpf_kfuncs.h | 12 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/file_reader.c | 113 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/dynptr_success.c | 12 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/file_reader.c | 145 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/file_reader_fail.c | 52 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/ip_check_defrag.c | 5 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c | 5 |
8 files changed, 326 insertions, 26 deletions
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6829936d33f5..77edd0253989 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5618,7 +5618,7 @@ union bpf_attr { * Return * *sk* if casting is valid, or **NULL** otherwise. * - * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) + * long bpf_dynptr_from_mem(void *data, u64 size, u64 flags, struct bpf_dynptr *ptr) * Description * Get a dynptr to local memory *data*. * @@ -5661,7 +5661,7 @@ union bpf_attr { * Return * Nothing. Always succeeds. * - * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags) + * long bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr *src, u64 offset, u64 flags) * Description * Read *len* bytes from *src* into *dst*, starting from *offset* * into *src*. @@ -5671,7 +5671,7 @@ union bpf_attr { * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if * *flags* is not 0. * - * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags) + * long bpf_dynptr_write(const struct bpf_dynptr *dst, u64 offset, void *src, u64 len, u64 flags) * Description * Write *len* bytes from *src* into *dst*, starting from *offset* * into *dst*. @@ -5692,7 +5692,7 @@ union bpf_attr { * is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs, * other errors correspond to errors returned by **bpf_skb_store_bytes**\ (). * - * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len) + * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u64 offset, u64 len) * Description * Get a pointer to the underlying dynptr data. * diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 794d44d19c88..e0189254bb6e 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -28,8 +28,8 @@ extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags, * Either a direct pointer to the dynptr data or a pointer to the user-provided * buffer if unable to obtain a direct pointer */ -extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset, - void *buffer, __u32 buffer__szk) __ksym __weak; +extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u64 offset, + void *buffer, __u64 buffer__szk) __ksym __weak; /* Description * Obtain a read-write pointer to the dynptr's data @@ -37,13 +37,13 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset, * Either a direct pointer to the dynptr data or a pointer to the user-provided * buffer if unable to obtain a direct pointer */ -extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset, - void *buffer, __u32 buffer__szk) __ksym __weak; +extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u64 offset, void *buffer, + __u64 buffer__szk) __ksym __weak; -extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym __weak; +extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u64 start, __u64 end) __ksym __weak; extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak; extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak; -extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak; +extern __u64 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak; extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym __weak; /* Description diff --git a/tools/testing/selftests/bpf/prog_tests/file_reader.c b/tools/testing/selftests/bpf/prog_tests/file_reader.c new file mode 100644 index 000000000000..2a034d43b73e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/file_reader.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <test_progs.h> +#include <network_helpers.h> +#include "file_reader.skel.h" +#include "file_reader_fail.skel.h" +#include <dlfcn.h> +#include <sys/mman.h> + +const char *user_ptr = "hello world"; +char file_contents[256000]; + +void *get_executable_base_addr(void) +{ + Dl_info info; + + if (!dladdr((void *)&get_executable_base_addr, &info)) { + fprintf(stderr, "dladdr failed\n"); + return NULL; + } + + return info.dli_fbase; +} + +static int initialize_file_contents(void) +{ + int fd, page_sz = sysconf(_SC_PAGESIZE); + ssize_t n = 0, cur, off; + void *addr; + + fd = open("/proc/self/exe", O_RDONLY); + if (!ASSERT_OK_FD(fd, "Open /proc/self/exe\n")) + return 1; + + do { + cur = read(fd, file_contents + n, sizeof(file_contents) - n); + if (!ASSERT_GT(cur, 0, "read success")) + break; + n += cur; + } while (n < sizeof(file_contents)); + + close(fd); + + if (!ASSERT_EQ(n, sizeof(file_contents), "Read /proc/self/exe\n")) + return 1; + + addr = get_executable_base_addr(); + if (!ASSERT_NEQ(addr, NULL, "get executable address")) + return 1; + + /* page-align base file address */ + addr = (void *)((unsigned long)addr & ~(page_sz - 1)); + + for (off = 0; off < sizeof(file_contents); off += page_sz) { + if (!ASSERT_OK(madvise(addr + off, page_sz, MADV_PAGEOUT), + "madvise pageout")) + return errno; + } + + return 0; +} + +static void run_test(const char *prog_name) +{ + struct file_reader *skel; + struct bpf_program *prog; + int err, fd; + + err = initialize_file_contents(); + if (!ASSERT_OK(err, "initialize file contents")) + return; + + skel = file_reader__open(); + if (!ASSERT_OK_PTR(skel, "file_reader__open")) + return; + + bpf_object__for_each_program(prog, skel->obj) { + bpf_program__set_autoload(prog, strcmp(bpf_program__name(prog), prog_name) == 0); + } + + memcpy(skel->bss->user_buf, file_contents, sizeof(file_contents)); + skel->bss->pid = getpid(); + + err = file_reader__load(skel); + if (!ASSERT_OK(err, "file_reader__load")) + goto cleanup; + + err = file_reader__attach(skel); + if (!ASSERT_OK(err, "file_reader__attach")) + goto cleanup; + + fd = open("/proc/self/exe", O_RDONLY); + if (fd >= 0) + close(fd); + + ASSERT_EQ(skel->bss->err, 0, "err"); + ASSERT_EQ(skel->bss->run_success, 1, "run_success"); +cleanup: + file_reader__destroy(skel); +} + +void test_file_reader(void) +{ + if (test__start_subtest("on_open_expect_fault")) + run_test("on_open_expect_fault"); + + if (test__start_subtest("on_open_validate_file_read")) + run_test("on_open_validate_file_read"); + + if (test__start_subtest("negative")) + RUN_TESTS(file_reader_fail); +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index 127dea342e5a..e0d672d93adf 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -914,8 +914,8 @@ void *user_ptr; char expected_str[384]; __u32 test_len[7] = {0/* placeholder */, 0, 1, 2, 255, 256, 257}; -typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u32 off, - u32 size, const void *unsafe_ptr); +typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u64 off, + u64 size, const void *unsafe_ptr); /* Returns the offset just before the end of the maximum sized xdp fragment. * Any write larger than 32 bytes will be split between 2 fragments. @@ -1106,16 +1106,16 @@ int test_copy_from_user_str_dynptr(void *ctx) return 0; } -static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u32 off, - u32 size, const void *unsafe_ptr) +static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u64 off, + u64 size, const void *unsafe_ptr) { struct task_struct *task = bpf_get_current_task_btf(); return bpf_copy_from_user_task_dynptr(dptr, off, size, unsafe_ptr, task); } -static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u32 off, - u32 size, const void *unsafe_ptr) +static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u64 off, + u64 size, const void *unsafe_ptr) { struct task_struct *task = bpf_get_current_task_btf(); diff --git a/tools/testing/selftests/bpf/progs/file_reader.c b/tools/testing/selftests/bpf/progs/file_reader.c new file mode 100644 index 000000000000..2585f83b0ce5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/file_reader.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <string.h> +#include <stdbool.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "errno.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} arrmap SEC(".maps"); + +struct elem { + struct file *file; + struct bpf_task_work tw; +}; + +char user_buf[256000]; +char tmp_buf[256000]; + +int pid = 0; +int err, run_success = 0; + +static int validate_file_read(struct file *file); +static int task_work_callback(struct bpf_map *map, void *key, void *value); + +SEC("lsm/file_open") +int on_open_expect_fault(void *c) +{ + struct bpf_dynptr dynptr; + struct file *file; + int local_err = 1; + __u32 user_buf_sz = sizeof(user_buf); + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + file = bpf_get_task_exe_file(bpf_get_current_task_btf()); + if (!file) + return 0; + + if (bpf_dynptr_from_file(file, 0, &dynptr)) + goto out; + + local_err = bpf_dynptr_read(tmp_buf, user_buf_sz, &dynptr, 0, 0); + if (local_err == -EFAULT) { /* Expect page fault */ + local_err = 0; + run_success = 1; + } +out: + bpf_dynptr_file_discard(&dynptr); + if (local_err) + err = local_err; + bpf_put_file(file); + return 0; +} + +SEC("lsm/file_open") +int on_open_validate_file_read(void *c) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct elem *work; + int key = 0; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + work = bpf_map_lookup_elem(&arrmap, &key); + if (!work) { + err = 1; + return 0; + } + bpf_task_work_schedule_signal(task, &work->tw, &arrmap, task_work_callback, NULL); + return 0; +} + +/* Called in a sleepable context, read 256K bytes, cross check with user space read data */ +static int task_work_callback(struct bpf_map *map, void *key, void *value) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct file *file = bpf_get_task_exe_file(task); + + if (!file) + return 0; + + err = validate_file_read(file); + if (!err) + run_success = 1; + bpf_put_file(file); + return 0; +} + +static int verify_dynptr_read(struct bpf_dynptr *ptr, u32 off, char *user_buf, u32 len) +{ + int i; + + if (bpf_dynptr_read(tmp_buf, len, ptr, off, 0)) + return 1; + + /* Verify file contents read from BPF is the same as the one read from userspace */ + bpf_for(i, 0, len) + { + if (tmp_buf[i] != user_buf[i]) + return 1; + } + return 0; +} + +static int validate_file_read(struct file *file) +{ + struct bpf_dynptr dynptr; + int loc_err = 1, off; + __u32 user_buf_sz = sizeof(user_buf); + + if (bpf_dynptr_from_file(file, 0, &dynptr)) + goto cleanup; + + loc_err = verify_dynptr_read(&dynptr, 0, user_buf, user_buf_sz); + off = 1; + loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, user_buf_sz - off); + off = user_buf_sz - 1; + loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, user_buf_sz - off); + /* Read file with random offset and length */ + off = 4097; + loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, 100); + + /* Adjust dynptr, verify read */ + loc_err = loc_err ?: bpf_dynptr_adjust(&dynptr, off, off + 1); + loc_err = loc_err ?: verify_dynptr_read(&dynptr, 0, user_buf + off, 1); + /* Can't read more than 1 byte */ + loc_err = loc_err ?: verify_dynptr_read(&dynptr, 0, user_buf + off, 2) == 0; + /* Can't read with far offset */ + loc_err = loc_err ?: verify_dynptr_read(&dynptr, 1, user_buf + off, 1) == 0; + +cleanup: + bpf_dynptr_file_discard(&dynptr); + return loc_err; +} diff --git a/tools/testing/selftests/bpf/progs/file_reader_fail.c b/tools/testing/selftests/bpf/progs/file_reader_fail.c new file mode 100644 index 000000000000..32fe28ed2439 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/file_reader_fail.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <string.h> +#include <stdbool.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +int err; +void *user_ptr; + +SEC("lsm/file_open") +__failure +__msg("Unreleased reference id=") +int on_nanosleep_unreleased_ref(void *ctx) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct file *file = bpf_get_task_exe_file(task); + struct bpf_dynptr dynptr; + + if (!file) + return 0; + + err = bpf_dynptr_from_file(file, 0, &dynptr); + return err ? 1 : 0; +} + +SEC("xdp") +__failure +__msg("Expected a dynptr of type file as arg #0") +int xdp_wrong_dynptr_type(struct xdp_md *xdp) +{ + struct bpf_dynptr dynptr; + + bpf_dynptr_from_xdp(xdp, 0, &dynptr); + bpf_dynptr_file_discard(&dynptr); + return 0; +} + +SEC("xdp") +__failure +__msg("Expected an initialized dynptr as arg #0") +int xdp_no_dynptr_type(struct xdp_md *xdp) +{ + struct bpf_dynptr dynptr; + + bpf_dynptr_file_discard(&dynptr); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/ip_check_defrag.c b/tools/testing/selftests/bpf/progs/ip_check_defrag.c index 645b2c9f7867..0e87ad1ebcfa 100644 --- a/tools/testing/selftests/bpf/progs/ip_check_defrag.c +++ b/tools/testing/selftests/bpf/progs/ip_check_defrag.c @@ -12,11 +12,6 @@ #define IP_OFFSET 0x1FFF #define NEXTHDR_FRAGMENT 44 -extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, - struct bpf_dynptr *ptr__uninit) __ksym; -extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, - void *buffer, uint32_t buffer__sz) __ksym; - volatile int shootdowns = 0; static bool is_frag_v4(struct iphdr *iph) diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c index ab9f9f2620ed..e2cbc5bda65e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c +++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c @@ -79,11 +79,6 @@ int with_invalid_ctx_access_test5(struct bpf_nf_ctx *ctx) return NF_ACCEPT; } -extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, - struct bpf_dynptr *ptr__uninit) __ksym; -extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, - void *buffer, uint32_t buffer__sz) __ksym; - SEC("netfilter") __description("netfilter test prog with skb and state read access") __success __failure_unpriv |
