summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/arm64/include/asm/cputype.h28
-rw-r--r--tools/arch/arm64/include/asm/sysreg.h3
-rw-r--r--tools/arch/loongarch/include/asm/inst.h12
-rw-r--r--tools/arch/powerpc/include/uapi/asm/kvm.h13
-rw-r--r--tools/arch/riscv/include/asm/csr.h6
-rw-r--r--tools/arch/riscv/include/asm/vdso/processor.h4
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h10
-rw-r--r--tools/arch/x86/include/asm/inat.h15
-rw-r--r--tools/arch/x86/include/asm/insn.h51
-rw-r--r--tools/arch/x86/include/asm/msr-index.h21
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h8
-rw-r--r--tools/arch/x86/lib/inat.c13
-rw-r--r--tools/arch/x86/lib/insn.c35
-rw-r--r--tools/arch/x86/lib/x86-opcode-map.txt111
-rw-r--r--tools/arch/x86/tools/gen-insn-attr-x86.awk44
-rw-r--r--tools/bootconfig/main.c4
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-gen.rst13
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst14
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-token.rst64
-rw-r--r--tools/bpf/bpftool/Makefile6
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool37
-rw-r--r--tools/bpf/bpftool/btf_dumper.c2
-rw-r--r--tools/bpf/bpftool/cgroup.c4
-rw-r--r--tools/bpf/bpftool/common.c93
-rw-r--r--tools/bpf/bpftool/feature.c86
-rw-r--r--tools/bpf/bpftool/gen.c68
-rw-r--r--tools/bpf/bpftool/link.c54
-rw-r--r--tools/bpf/bpftool/main.c29
-rw-r--r--tools/bpf/bpftool/main.h21
-rw-r--r--tools/bpf/bpftool/prog.c33
-rw-r--r--tools/bpf/bpftool/sign.c211
-rw-r--r--tools/bpf/bpftool/token.c210
-rw-r--r--tools/bpf/bpftool/tracelog.c11
-rw-r--r--tools/gpio/Makefile2
-rw-r--r--tools/include/linux/args.h28
-rw-r--r--tools/include/linux/bits.h29
-rw-r--r--tools/include/linux/cfi_types.h29
-rw-r--r--tools/include/uapi/asm-generic/unistd.h8
-rw-r--r--tools/include/uapi/linux/bpf.h22
-rw-r--r--tools/include/uapi/linux/kvm.h27
-rw-r--r--tools/include/uapi/linux/nsfs.h17
-rw-r--r--tools/lib/bpf/bpf.c6
-rw-r--r--tools/lib/bpf/bpf.h5
-rw-r--r--tools/lib/bpf/bpf_gen_internal.h2
-rw-r--r--tools/lib/bpf/gen_loader.c47
-rw-r--r--tools/lib/bpf/libbpf.c213
-rw-r--r--tools/lib/bpf/libbpf.h52
-rw-r--r--tools/lib/bpf/libbpf.map3
-rw-r--r--tools/lib/bpf/libbpf_internal.h4
-rw-r--r--tools/lib/bpf/skel_internal.h76
-rw-r--r--tools/lib/bpf/usdt.bpf.h44
-rw-r--r--tools/lib/bpf/usdt.c72
-rw-r--r--tools/lib/subcmd/help.c3
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_c.py2
-rw-r--r--tools/objtool/arch/loongarch/decode.c33
-rw-r--r--tools/objtool/arch/loongarch/special.c23
-rw-r--r--tools/objtool/arch/x86/decode.c12
-rw-r--r--tools/objtool/builtin-check.c2
-rw-r--r--tools/objtool/check.c48
-rw-r--r--tools/objtool/include/objtool/arch.h1
-rw-r--r--tools/objtool/include/objtool/builtin.h1
-rw-r--r--tools/objtool/noreturns.h1
-rw-r--r--tools/perf/arch/arm/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl2
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/sh/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/sparc/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_32.tbl2
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl2
-rw-r--r--tools/perf/arch/x86/tests/topdown.c1
-rw-r--r--tools/perf/arch/xtensa/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/bench/inject-buildid.c2
-rw-r--r--tools/perf/builtin-buildid-cache.c8
-rw-r--r--tools/perf/builtin-inject.c4
-rw-r--r--tools/perf/builtin-lock.c7
-rw-r--r--tools/perf/tests/pe-file-parsing.c4
-rw-r--r--tools/perf/tests/sdt.c2
-rwxr-xr-xtools/perf/tests/shell/test_bpf_metadata.sh2
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/fcntl.h18
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/fs.h88
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/prctl.h9
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/vhost.h35
-rw-r--r--tools/perf/util/bpf-event.c39
-rw-r--r--tools/perf/util/bpf-utils.c61
-rw-r--r--tools/perf/util/build-id.c4
-rw-r--r--tools/perf/util/debuginfo.c8
-rw-r--r--tools/perf/util/dsos.c4
-rw-r--r--tools/perf/util/include/linux/linkage.h2
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c2
-rw-r--r--tools/perf/util/maps.c9
-rw-r--r--tools/perf/util/symbol-elf.c19
-rw-r--r--tools/perf/util/symbol-minimal.c59
-rw-r--r--tools/perf/util/symbol.c8
-rw-r--r--tools/perf/util/symbol.h2
-rw-r--r--tools/perf/util/synthetic-events.c2
-rw-r--r--tools/power/cpupower/man/cpupower-set.17
-rw-r--r--tools/power/cpupower/utils/cpufreq-info.c16
-rw-r--r--tools/power/cpupower/utils/cpupower-set.c5
-rw-r--r--tools/power/cpupower/utils/helpers/helpers.h14
-rw-r--r--tools/power/cpupower/utils/helpers/misc.c76
-rw-r--r--tools/power/x86/turbostat/turbostat.c2
-rw-r--r--tools/sched_ext/include/scx/bpf_arena_common.bpf.h175
-rw-r--r--tools/sched_ext/include/scx/bpf_arena_common.h33
-rw-r--r--tools/sched_ext/include/scx/common.bpf.h104
-rw-r--r--tools/sched_ext/include/scx/common.h5
-rw-r--r--tools/sched_ext/include/scx/compat.bpf.h22
-rw-r--r--tools/sched_ext/include/scx/user_exit_info.bpf.h40
-rw-r--r--tools/sched_ext/include/scx/user_exit_info.h49
-rw-r--r--tools/sched_ext/include/scx/user_exit_info_common.h30
-rw-r--r--tools/sched_ext/scx_central.bpf.c2
-rw-r--r--tools/sched_ext/scx_central.c1
-rw-r--r--tools/sched_ext/scx_flatcg.bpf.c2
-rw-r--r--tools/sched_ext/scx_flatcg.c2
-rw-r--r--tools/sched_ext/scx_qmap.bpf.c98
-rw-r--r--tools/sched_ext/scx_qmap.c12
-rw-r--r--tools/sched_ext/scx_simple.c2
-rw-r--r--tools/scripts/syscall.tbl2
-rw-r--r--tools/testing/selftests/arm64/abi/hwcap.c22
-rw-r--r--tools/testing/selftests/arm64/abi/tpidr2.c8
-rw-r--r--tools/testing/selftests/arm64/bti/assembler.h1
-rw-r--r--tools/testing/selftests/arm64/fp/fp-ptrace.c6
-rw-r--r--tools/testing/selftests/arm64/fp/fp-stress.c6
-rw-r--r--tools/testing/selftests/arm64/fp/kernel-test.c4
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace.c104
-rw-r--r--tools/testing/selftests/arm64/fp/vec-syscfg.c1
-rw-r--r--tools/testing/selftests/arm64/fp/zt-ptrace.c1
-rw-r--r--tools/testing/selftests/arm64/gcs/Makefile6
-rw-r--r--tools/testing/selftests/arm64/gcs/basic-gcs.c12
-rw-r--r--tools/testing/selftests/arm64/gcs/gcs-locking.c1
-rw-r--r--tools/testing/selftests/arm64/gcs/gcs-stress.c2
-rw-r--r--tools/testing/selftests/arm64/pauth/exec_target.c7
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x1
-rw-r--r--tools/testing/selftests/bpf/Makefile43
-rw-r--r--tools/testing/selftests/bpf/bench.c22
-rw-r--r--tools/testing/selftests/bpf/bench.h1
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c555
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_sockmap.c5
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c61
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_trigger.sh4
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h54
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h3
-rw-r--r--tools/testing/selftests/bpf/bpf_util.h3
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c20
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/config1
-rw-r--r--tools/testing/selftests/bpf/config.aarch6412
-rw-r--r--tools/testing/selftests/bpf/config.ppc64el1
-rw-r--r--tools/testing/selftests/bpf/config.riscv641
-rw-r--r--tools/testing/selftests/bpf/config.s390x11
-rw-r--r--tools/testing/selftests/bpf/config.x86_645
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c178
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c13
-rw-r--r--tools/testing/selftests/bpf/prog_tests/atomics.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c71
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fd_array.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_fexit.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/free_timer.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kernel_flag.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c247
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_excl.c54
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_attach.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c50
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c125
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reg_bounds.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/res_spin_lock.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spin_lock.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map.c71
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stream.c131
-rw-r--r--tools/testing/selftests/bpf/prog_tests/string_kfuncs.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_data.h386
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_work_stress.c130
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c74
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_task_local_data.c297
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_task_work.c157
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_veristat.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer.c38
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_crash.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_lockup.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_mim.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tracing_struct.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe.c156
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c484
-rw-r--r--tools/testing/selftests/bpf/prog_tests/usdt.c121
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c222
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c179
-rw-r--r--tools/testing/selftests/bpf/progs/arena_atomics.c9
-rw-r--r--tools/testing/selftests/bpf/progs/arena_spin_lock.c5
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cc_cubic.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h24
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_test_utils.h18
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_read_xattr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c12
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_sanity.c46
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c258
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c55
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_assert.c34
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c2
-rw-r--r--tools/testing/selftests/bpf/progs/iters_state_safety.c6
-rw-r--r--tools/testing/selftests/bpf/progs/iters_task_failure.c4
-rw-r--r--tools/testing/selftests/bpf/progs/iters_testmod.c46
-rw-r--r--tools/testing/selftests/bpf/progs/iters_testmod_seq.c6
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_write_ctx.c22
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list_fail.c5
-rw-r--r--tools/testing/selftests/bpf/progs/loop1.c7
-rw-r--r--tools/testing/selftests/bpf/progs/loop2.c7
-rw-r--r--tools/testing/selftests/bpf/progs/loop3.c7
-rw-r--r--tools/testing/selftests/bpf/progs/loop6.c21
-rw-r--r--tools/testing/selftests/bpf/progs/lpm_trie.h30
-rw-r--r--tools/testing/selftests/bpf/progs/lpm_trie_bench.c230
-rw-r--r--tools/testing/selftests/bpf/progs/lpm_trie_map.c19
-rw-r--r--tools/testing/selftests/bpf/progs/map_excl.c34
-rw-r--r--tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c4
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_search.c2
-rw-r--r--tools/testing/selftests/bpf/progs/stacktrace_map.c (renamed from tools/testing/selftests/bpf/progs/test_stacktrace_map.c)2
-rw-r--r--tools/testing/selftests/bpf/progs/stream.c158
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c6
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c1
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_success.c13
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c59
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c59
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c2
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c3
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c3
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c3
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c3
-rw-r--r--tools/testing/selftests/bpf/progs/task_local_data.bpf.h237
-rw-r--r--tools/testing/selftests/bpf/progs/task_work.c107
-rw-r--r--tools/testing/selftests/bpf/progs/task_work_fail.c96
-rw-r--r--tools/testing/selftests/bpf/progs/task_work_stress.c73
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_overhead.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning_devmap.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_local_data.c65
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_uprobe.c38
-rw-r--r--tools/testing/selftests/bpf/progs/test_usdt.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c29
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_meta.c419
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_pull_data.c48
-rw-r--r--tools/testing/selftests/bpf/progs/timer_interrupt.c48
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_struct.c33
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c12
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_syscall.c4
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c60
-rw-r--r--tools/testing/selftests/bpf/progs/uretprobe_stack.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena_large.c1
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds.c79
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c27
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ctx.c32
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ldsx.c178
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_live_stack.c294
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_loops1.c21
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_map_ptr.c7
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_may_goto_1.c38
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_mul.c38
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_precision.c16
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_scalar_ids.c12
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sock.c48
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spill_fill.c40
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subprog_precision.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_var_off.c6
-rw-r--r--tools/testing/selftests/bpf/test_kmods/Makefile2
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c209
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.c166
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.h6
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h4
-rw-r--r--tools/testing/selftests/bpf/test_lirc_mode2_user.c2
-rw-r--r--tools/testing/selftests/bpf/test_loader.c300
-rw-r--r--tools/testing/selftests/bpf/test_progs.c13
-rw-r--r--tools/testing/selftests/bpf/test_progs.h17
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c2
-rw-r--r--tools/testing/selftests/bpf/test_tcpnotify_user.c20
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh2
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c14
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c234
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h3
-rw-r--r--tools/testing/selftests/bpf/usdt.h545
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_st_mem.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c8
-rwxr-xr-xtools/testing/selftests/bpf/verify_sig_setup.sh11
-rw-r--r--tools/testing/selftests/bpf/veristat.c56
-rw-r--r--tools/testing/selftests/bpf/xdping.c2
-rw-r--r--tools/testing/selftests/bpf/xsk.h4
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c14
-rw-r--r--tools/testing/selftests/cgroup/lib/cgroup_util.c12
-rw-r--r--tools/testing/selftests/cgroup/lib/include/cgroup_util.h1
-rw-r--r--tools/testing/selftests/cgroup/test_freezer.c663
-rw-r--r--tools/testing/selftests/cgroup/test_pids.c3
-rw-r--r--tools/testing/selftests/coredump/stackdump_test.c3
-rw-r--r--tools/testing/selftests/damon/Makefile1
-rw-r--r--tools/testing/selftests/drivers/net/bonding/Makefile3
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_options.sh197
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh105
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh3
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh2
-rw-r--r--tools/testing/selftests/drivers/net/bonding/config2
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/csum.py4
-rwxr-xr-xtools/testing/selftests/drivers/net/napi_threaded.py10
-rw-r--r--tools/testing/selftests/filesystems/.gitignore1
-rw-r--r--tools/testing/selftests/filesystems/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/fclog.c130
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c17
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c18
-rw-r--r--tools/testing/selftests/futex/functional/Makefile8
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa.c3
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa_mpol.c100
-rw-r--r--tools/testing/selftests/futex/functional/futex_priv_hash.c67
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue.c76
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi.c266
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c86
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c129
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait.c103
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c83
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_timeout.c139
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c76
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_wouldblock.c76
-rw-r--r--tools/testing/selftests/futex/functional/futex_waitv.c99
-rwxr-xr-xtools/testing/selftests/futex/functional/run.sh62
-rw-r--r--tools/testing/selftests/futex/include/futextest.h11
-rw-r--r--tools/testing/selftests/futex/include/logging.h148
-rw-r--r--tools/testing/selftests/iommu/iommufd_fail_nth.c2
-rw-r--r--tools/testing/selftests/kselftest.h14
-rw-r--r--tools/testing/selftests/kselftest_harness.h17
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm1
-rw-r--r--tools/testing/selftests/kvm/arm64/aarch32_id_regs.c2
-rw-r--r--tools/testing/selftests/kvm/arm64/debug-exceptions.c12
-rw-r--r--tools/testing/selftests/kvm/arm64/kvm-uuid.c70
-rw-r--r--tools/testing/selftests/kvm/arm64/no-vgic-v3.c4
-rw-r--r--tools/testing/selftests/kvm/arm64/page_fault_test.c6
-rw-r--r--tools/testing/selftests/kvm/arm64/set_id_regs.c9
-rw-r--r--tools/testing/selftests/kvm/arm64/vpmu_counter_access.c2
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/processor.c6
-rw-r--r--tools/testing/selftests/mm/cow.c4
-rw-r--r--tools/testing/selftests/mm/guard-regions.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb-madvise.c4
-rw-r--r--tools/testing/selftests/mm/migration.c2
-rw-r--r--tools/testing/selftests/mm/mremap_test.c264
-rw-r--r--tools/testing/selftests/mm/pagemap_ioctl.c2
-rw-r--r--tools/testing/selftests/mm/split_huge_page_test.c7
-rw-r--r--tools/testing/selftests/mm/vm_util.h2
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c77
-rw-r--r--tools/testing/selftests/namespaces/.gitignore3
-rw-r--r--tools/testing/selftests/namespaces/Makefile7
-rw-r--r--tools/testing/selftests/namespaces/config7
-rw-r--r--tools/testing/selftests/namespaces/file_handle_test.c1429
-rw-r--r--tools/testing/selftests/namespaces/init_ino_test.c61
-rw-r--r--tools/testing/selftests/namespaces/nsid_test.c986
-rw-r--r--tools/testing/selftests/net/Makefile2
-rw-r--r--tools/testing/selftests/net/bind_bhash.c4
-rwxr-xr-xtools/testing/selftests/net/broadcast_ether_dst.sh83
-rw-r--r--tools/testing/selftests/net/can/config3
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh52
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh29
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_ets.sh1
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_tests.sh8
-rw-r--r--tools/testing/selftests/net/lib/xdp_native.bpf.c89
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh2
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c16
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh8
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_inq.c5
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh3
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh2
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_sockopt.c21
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh2
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh6
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c7
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh2
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh16
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_clash.sh2
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_resize.sh5
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_flowtable.sh113
-rw-r--r--tools/testing/selftests/net/netfilter/udpclash.c2
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh88
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-disconnect.pkt26
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_nh.sh223
-rw-r--r--tools/testing/selftests/net/tls.c377
-rw-r--r--tools/testing/selftests/pidfd/config1
-rw-r--r--tools/testing/selftests/powerpc/include/instructions.h2
-rw-r--r--tools/testing/selftests/proc/.gitignore1
-rw-r--r--tools/testing/selftests/proc/Makefile1
-rw-r--r--tools/testing/selftests/proc/proc-maps-race.c6
-rw-r--r--tools/testing/selftests/proc/proc-pidns.c211
-rw-r--r--tools/testing/selftests/riscv/README24
-rw-r--r--tools/testing/selftests/rseq/rseq.c8
-rw-r--r--tools/testing/selftests/sched_ext/hotplug.c1
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c238
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json198
-rw-r--r--tools/testing/selftests/ublk/file_backed.c10
-rw-r--r--tools/testing/selftests/ublk/kublk.c42
-rw-r--r--tools/testing/selftests/ublk/kublk.h45
-rw-r--r--tools/testing/selftests/ublk/null.c4
-rw-r--r--tools/testing/selftests/ublk/stripe.c4
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_04.sh6
-rw-r--r--tools/testing/selftests/vDSO/.gitignore1
-rw-r--r--tools/testing/selftests/vDSO/Makefile2
-rw-r--r--tools/testing/selftests/vDSO/vdso_call.h7
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_abi.c101
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_clock_getres.c123
-rw-r--r--tools/testing/shared/linux/idr.h4
-rw-r--r--tools/tracing/latency/Makefile.config8
-rw-r--r--tools/tracing/rtla/Makefile.config8
-rw-r--r--tools/tracing/rtla/src/actions.c4
425 files changed, 17823 insertions, 2898 deletions
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
index 9a5d85cfd1fb..139d5e87dc95 100644
--- a/tools/arch/arm64/include/asm/cputype.h
+++ b/tools/arch/arm64/include/asm/cputype.h
@@ -75,11 +75,13 @@
#define ARM_CPU_PART_CORTEX_A76 0xD0B
#define ARM_CPU_PART_NEOVERSE_N1 0xD0C
#define ARM_CPU_PART_CORTEX_A77 0xD0D
+#define ARM_CPU_PART_CORTEX_A76AE 0xD0E
#define ARM_CPU_PART_NEOVERSE_V1 0xD40
#define ARM_CPU_PART_CORTEX_A78 0xD41
#define ARM_CPU_PART_CORTEX_A78AE 0xD42
#define ARM_CPU_PART_CORTEX_X1 0xD44
#define ARM_CPU_PART_CORTEX_A510 0xD46
+#define ARM_CPU_PART_CORTEX_X1C 0xD4C
#define ARM_CPU_PART_CORTEX_A520 0xD80
#define ARM_CPU_PART_CORTEX_A710 0xD47
#define ARM_CPU_PART_CORTEX_A715 0xD4D
@@ -119,9 +121,11 @@
#define QCOM_CPU_PART_KRYO 0x200
#define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800
#define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801
+#define QCOM_CPU_PART_KRYO_3XX_GOLD 0x802
#define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803
#define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804
#define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805
+#define QCOM_CPU_PART_ORYON_X1 0x001
#define NVIDIA_CPU_PART_DENVER 0x003
#define NVIDIA_CPU_PART_CARMEL 0x004
@@ -129,6 +133,7 @@
#define FUJITSU_CPU_PART_A64FX 0x001
#define HISI_CPU_PART_TSV110 0xD01
+#define HISI_CPU_PART_HIP09 0xD02
#define HISI_CPU_PART_HIP12 0xD06
#define APPLE_CPU_PART_M1_ICESTORM 0x022
@@ -159,11 +164,13 @@
#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
#define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1)
#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
+#define MIDR_CORTEX_A76AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76AE)
#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
+#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C)
#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520)
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
#define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715)
@@ -196,13 +203,26 @@
#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
#define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD)
#define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER)
+#define MIDR_QCOM_KRYO_3XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_GOLD)
#define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER)
#define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD)
#define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER)
+#define MIDR_QCOM_ORYON_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_ORYON_X1)
+
+/*
+ * NOTES:
+ * - Qualcomm Kryo 5XX Prime / Gold ID themselves as MIDR_CORTEX_A77
+ * - Qualcomm Kryo 5XX Silver IDs itself as MIDR_QCOM_KRYO_4XX_SILVER
+ * - Qualcomm Kryo 6XX Prime IDs itself as MIDR_CORTEX_X1
+ * - Qualcomm Kryo 6XX Gold IDs itself as ARM_CPU_PART_CORTEX_A78
+ * - Qualcomm Kryo 6XX Silver IDs itself as MIDR_CORTEX_A55
+ */
+
#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
+#define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09)
#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
@@ -291,6 +311,14 @@ static inline u32 __attribute_const__ read_cpuid_id(void)
return read_cpuid(MIDR_EL1);
}
+struct target_impl_cpu {
+ u64 midr;
+ u64 revidr;
+ u64 aidr;
+};
+
+bool cpu_errata_set_target_impl(u64 num, void *impl_cpus);
+
static inline u64 __attribute_const__ read_cpuid_mpidr(void)
{
return read_cpuid(MPIDR_EL1);
diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h
index 690b6ebd118f..65f2759ea27a 100644
--- a/tools/arch/arm64/include/asm/sysreg.h
+++ b/tools/arch/arm64/include/asm/sysreg.h
@@ -1080,9 +1080,6 @@
#define ARM64_FEATURE_FIELD_BITS 4
-/* Defined for compatibility only, do not add new users. */
-#define ARM64_FEATURE_MASK(x) (x##_MASK)
-
#ifdef __ASSEMBLY__
.macro mrs_s, rt, sreg
diff --git a/tools/arch/loongarch/include/asm/inst.h b/tools/arch/loongarch/include/asm/inst.h
index c25b5853181d..d68fad63c8b7 100644
--- a/tools/arch/loongarch/include/asm/inst.h
+++ b/tools/arch/loongarch/include/asm/inst.h
@@ -51,6 +51,10 @@ enum reg2i16_op {
bgeu_op = 0x1b,
};
+enum reg3_op {
+ amswapw_op = 0x70c0,
+};
+
struct reg0i15_format {
unsigned int immediate : 15;
unsigned int opcode : 17;
@@ -96,6 +100,13 @@ struct reg2i16_format {
unsigned int opcode : 6;
};
+struct reg3_format {
+ unsigned int rd : 5;
+ unsigned int rj : 5;
+ unsigned int rk : 5;
+ unsigned int opcode : 17;
+};
+
union loongarch_instruction {
unsigned int word;
struct reg0i15_format reg0i15_format;
@@ -105,6 +116,7 @@ union loongarch_instruction {
struct reg2i12_format reg2i12_format;
struct reg2i14_format reg2i14_format;
struct reg2i16_format reg2i16_format;
+ struct reg3_format reg3_format;
};
#define LOONGARCH_INSN_SIZE sizeof(union loongarch_instruction)
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index eaeda001784e..077c5437f521 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -1,18 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
* Copyright IBM Corp. 2007
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
diff --git a/tools/arch/riscv/include/asm/csr.h b/tools/arch/riscv/include/asm/csr.h
index 0dfc09254f99..56d7367ee344 100644
--- a/tools/arch/riscv/include/asm/csr.h
+++ b/tools/arch/riscv/include/asm/csr.h
@@ -468,13 +468,13 @@
#define IE_TIE (_AC(0x1, UL) << RV_IRQ_TIMER)
#define IE_EIE (_AC(0x1, UL) << RV_IRQ_EXT)
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define __ASM_STR(x) x
#else
#define __ASM_STR(x) #x
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define csr_swap(csr, val) \
({ \
@@ -536,6 +536,6 @@
: "memory"); \
})
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_RISCV_CSR_H */
diff --git a/tools/arch/riscv/include/asm/vdso/processor.h b/tools/arch/riscv/include/asm/vdso/processor.h
index 662aca039848..0665b117f30f 100644
--- a/tools/arch/riscv/include/asm/vdso/processor.h
+++ b/tools/arch/riscv/include/asm/vdso/processor.h
@@ -2,7 +2,7 @@
#ifndef __ASM_VDSO_PROCESSOR_H
#define __ASM_VDSO_PROCESSOR_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm-generic/barrier.h>
@@ -27,6 +27,6 @@ static inline void cpu_relax(void)
barrier();
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index ee176236c2be..06fc0479a23f 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -218,6 +218,7 @@
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */
#define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */
#define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */
+#define X86_FEATURE_COHERENCY_SFW_NO ( 8*32+ 4) /* SNP cache coherency software work around not needed */
#define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */
#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */
@@ -456,10 +457,14 @@
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */
+#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */
#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */
+
#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */
#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */
+#define X86_FEATURE_GP_ON_USER_CPUID (20*32+17) /* User CPUID faulting */
+
#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */
#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
@@ -487,6 +492,9 @@
#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */
#define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */
#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */
+#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */
+#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */
+#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */
/*
* BUG word(s)
@@ -542,5 +550,5 @@
#define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */
#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */
#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
-
+#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/inat.h b/tools/arch/x86/include/asm/inat.h
index 183aa662b165..099e926595bd 100644
--- a/tools/arch/x86/include/asm/inat.h
+++ b/tools/arch/x86/include/asm/inat.h
@@ -37,6 +37,8 @@
#define INAT_PFX_EVEX 15 /* EVEX prefix */
/* x86-64 REX2 prefix */
#define INAT_PFX_REX2 16 /* 0xD5 */
+/* AMD XOP prefix */
+#define INAT_PFX_XOP 17 /* 0x8F */
#define INAT_LSTPFX_MAX 3
#define INAT_LGCPFX_MAX 11
@@ -77,6 +79,7 @@
#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3))
#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4))
#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
+#define INAT_XOPOK INAT_VEXOK
#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7))
#define INAT_NO_REX2 (1 << (INAT_FLAG_OFFS + 8))
@@ -111,6 +114,8 @@ extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
insn_byte_t vex_m,
insn_byte_t vex_pp);
+extern insn_attr_t inat_get_xop_attribute(insn_byte_t opcode,
+ insn_byte_t map_select);
/* Attribute checking functions */
static inline int inat_is_legacy_prefix(insn_attr_t attr)
@@ -164,6 +169,11 @@ static inline int inat_is_vex3_prefix(insn_attr_t attr)
return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
}
+static inline int inat_is_xop_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_XOP;
+}
+
static inline int inat_is_escape(insn_attr_t attr)
{
return attr & INAT_ESC_MASK;
@@ -229,6 +239,11 @@ static inline int inat_accept_vex(insn_attr_t attr)
return attr & INAT_VEXOK;
}
+static inline int inat_accept_xop(insn_attr_t attr)
+{
+ return attr & INAT_XOPOK;
+}
+
static inline int inat_must_vex(insn_attr_t attr)
{
return attr & (INAT_VEXONLY | INAT_EVEXONLY);
diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h
index 0e5abd896ad4..c683d609934b 100644
--- a/tools/arch/x86/include/asm/insn.h
+++ b/tools/arch/x86/include/asm/insn.h
@@ -71,7 +71,10 @@ struct insn {
* prefixes.bytes[3]: last prefix
*/
struct insn_field rex_prefix; /* REX prefix */
- struct insn_field vex_prefix; /* VEX prefix */
+ union {
+ struct insn_field vex_prefix; /* VEX prefix */
+ struct insn_field xop_prefix; /* XOP prefix */
+ };
struct insn_field opcode; /*
* opcode.bytes[0]: opcode1
* opcode.bytes[1]: opcode2
@@ -135,6 +138,17 @@ struct insn {
#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
+/* XOP bit fields */
+#define X86_XOP_R(xop) ((xop) & 0x80) /* XOP Byte2 */
+#define X86_XOP_X(xop) ((xop) & 0x40) /* XOP Byte2 */
+#define X86_XOP_B(xop) ((xop) & 0x20) /* XOP Byte2 */
+#define X86_XOP_M(xop) ((xop) & 0x1f) /* XOP Byte2 */
+#define X86_XOP_W(xop) ((xop) & 0x80) /* XOP Byte3 */
+#define X86_XOP_V(xop) ((xop) & 0x78) /* XOP Byte3 */
+#define X86_XOP_L(xop) ((xop) & 0x04) /* XOP Byte3 */
+#define X86_XOP_P(xop) ((xop) & 0x03) /* XOP Byte3 */
+#define X86_XOP_M_MIN 0x08 /* Min of XOP.M */
+#define X86_XOP_M_MAX 0x1f /* Max of XOP.M */
extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
extern int insn_get_prefixes(struct insn *insn);
@@ -178,7 +192,7 @@ static inline insn_byte_t insn_rex2_m_bit(struct insn *insn)
return X86_REX2_M(insn->rex_prefix.bytes[1]);
}
-static inline int insn_is_avx(struct insn *insn)
+static inline int insn_is_avx_or_xop(struct insn *insn)
{
if (!insn->prefixes.got)
insn_get_prefixes(insn);
@@ -192,6 +206,22 @@ static inline int insn_is_evex(struct insn *insn)
return (insn->vex_prefix.nbytes == 4);
}
+/* If we already know this is AVX/XOP encoded */
+static inline int avx_insn_is_xop(struct insn *insn)
+{
+ insn_attr_t attr = inat_get_opcode_attribute(insn->vex_prefix.bytes[0]);
+
+ return inat_is_xop_prefix(attr);
+}
+
+static inline int insn_is_xop(struct insn *insn)
+{
+ if (!insn_is_avx_or_xop(insn))
+ return 0;
+
+ return avx_insn_is_xop(insn);
+}
+
static inline int insn_has_emulate_prefix(struct insn *insn)
{
return !!insn->emulate_prefix_size;
@@ -222,11 +252,26 @@ static inline insn_byte_t insn_vex_w_bit(struct insn *insn)
return X86_VEX_W(insn->vex_prefix.bytes[2]);
}
+static inline insn_byte_t insn_xop_map_bits(struct insn *insn)
+{
+ if (insn->xop_prefix.nbytes < 3) /* XOP is 3 bytes */
+ return 0;
+ return X86_XOP_M(insn->xop_prefix.bytes[1]);
+}
+
+static inline insn_byte_t insn_xop_p_bits(struct insn *insn)
+{
+ return X86_XOP_P(insn->vex_prefix.bytes[2]);
+}
+
/* Get the last prefix id from last prefix or VEX prefix */
static inline int insn_last_prefix_id(struct insn *insn)
{
- if (insn_is_avx(insn))
+ if (insn_is_avx_or_xop(insn)) {
+ if (avx_insn_is_xop(insn))
+ return insn_xop_p_bits(insn);
return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
+ }
if (insn->prefixes.bytes[3])
return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 5cfb5d74dd5f..f627196eb796 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -315,12 +315,14 @@
#define PERF_CAP_PT_IDX 16
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
-#define PERF_CAP_PEBS_TRAP BIT_ULL(6)
-#define PERF_CAP_ARCH_REG BIT_ULL(7)
-#define PERF_CAP_PEBS_FORMAT 0xf00
-#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
-#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
- PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE)
+#define PERF_CAP_PEBS_TRAP BIT_ULL(6)
+#define PERF_CAP_ARCH_REG BIT_ULL(7)
+#define PERF_CAP_PEBS_FORMAT 0xf00
+#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
+#define PERF_CAP_PEBS_TIMING_INFO BIT_ULL(17)
+#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
+ PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
+ PERF_CAP_PEBS_TIMING_INFO)
#define MSR_IA32_RTIT_CTL 0x00000570
#define RTIT_CTL_TRACEEN BIT(0)
@@ -419,6 +421,7 @@
#define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12)
#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
+#define DEBUGCTLMSR_RTM_DEBUG BIT(15)
#define MSR_PEBS_FRONTEND 0x000003f7
@@ -733,6 +736,11 @@
#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
+/* AMD Hardware Feedback Support MSRs */
+#define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500
+#define MSR_AMD_WORKLOAD_CLASS_ID 0xc0000501
+#define MSR_AMD_WORKLOAD_HRST 0xc0000502
+
/* AMD Last Branch Record MSRs */
#define MSR_AMD64_LBR_SELECT 0xc000010e
@@ -831,6 +839,7 @@
#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
#define MSR_K7_HWCR_IRPERF_EN_BIT 30
#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
+#define MSR_K7_HWCR_CPUID_USER_DIS_BIT 35
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
#define MSR_K7_HWCR_CPB_DIS_BIT 25
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 6f3499507c5e..0f15d683817d 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -965,7 +965,13 @@ struct kvm_tdx_cmd {
struct kvm_tdx_capabilities {
__u64 supported_attrs;
__u64 supported_xfam;
- __u64 reserved[254];
+
+ __u64 kernel_tdvmcallinfo_1_r11;
+ __u64 user_tdvmcallinfo_1_r11;
+ __u64 kernel_tdvmcallinfo_1_r12;
+ __u64 user_tdvmcallinfo_1_r12;
+
+ __u64 reserved[250];
/* Configurable CPUID bits for userspace */
struct kvm_cpuid2 cpuid;
diff --git a/tools/arch/x86/lib/inat.c b/tools/arch/x86/lib/inat.c
index dfbcc6405941..ffcb0e27453b 100644
--- a/tools/arch/x86/lib/inat.c
+++ b/tools/arch/x86/lib/inat.c
@@ -81,3 +81,16 @@ insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
return table[opcode];
}
+insn_attr_t inat_get_xop_attribute(insn_byte_t opcode, insn_byte_t map_select)
+{
+ const insn_attr_t *table;
+
+ if (map_select < X86_XOP_M_MIN || map_select > X86_XOP_M_MAX)
+ return 0;
+ map_select -= X86_XOP_M_MIN;
+ /* At first, this checks the master table */
+ table = inat_xop_tables[map_select];
+ if (!table)
+ return 0;
+ return table[opcode];
+}
diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c
index bce69c6bfa69..1d1c57c74d1f 100644
--- a/tools/arch/x86/lib/insn.c
+++ b/tools/arch/x86/lib/insn.c
@@ -200,12 +200,15 @@ found:
}
insn->rex_prefix.got = 1;
- /* Decode VEX prefix */
+ /* Decode VEX/XOP prefix */
b = peek_next(insn_byte_t, insn);
- attr = inat_get_opcode_attribute(b);
- if (inat_is_vex_prefix(attr)) {
+ if (inat_is_vex_prefix(attr) || inat_is_xop_prefix(attr)) {
insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
- if (!insn->x86_64) {
+
+ if (inat_is_xop_prefix(attr) && X86_MODRM_REG(b2) == 0) {
+ /* Grp1A.0 is always POP Ev */
+ goto vex_end;
+ } else if (!insn->x86_64) {
/*
* In 32-bits mode, if the [7:6] bits (mod bits of
* ModRM) on the second byte are not 11b, it is
@@ -226,13 +229,13 @@ found:
if (insn->x86_64 && X86_VEX_W(b2))
/* VEX.W overrides opnd_size */
insn->opnd_bytes = 8;
- } else if (inat_is_vex3_prefix(attr)) {
+ } else if (inat_is_vex3_prefix(attr) || inat_is_xop_prefix(attr)) {
b2 = peek_nbyte_next(insn_byte_t, insn, 2);
insn_set_byte(&insn->vex_prefix, 2, b2);
insn->vex_prefix.nbytes = 3;
insn->next_byte += 3;
if (insn->x86_64 && X86_VEX_W(b2))
- /* VEX.W overrides opnd_size */
+ /* VEX.W/XOP.W overrides opnd_size */
insn->opnd_bytes = 8;
} else {
/*
@@ -288,9 +291,22 @@ int insn_get_opcode(struct insn *insn)
insn_set_byte(opcode, 0, op);
opcode->nbytes = 1;
- /* Check if there is VEX prefix or not */
- if (insn_is_avx(insn)) {
+ /* Check if there is VEX/XOP prefix or not */
+ if (insn_is_avx_or_xop(insn)) {
insn_byte_t m, p;
+
+ /* XOP prefix has different encoding */
+ if (unlikely(avx_insn_is_xop(insn))) {
+ m = insn_xop_map_bits(insn);
+ insn->attr = inat_get_xop_attribute(op, m);
+ if (!inat_accept_xop(insn->attr)) {
+ insn->attr = 0;
+ return -EINVAL;
+ }
+ /* XOP has only 1 byte for opcode */
+ goto end;
+ }
+
m = insn_vex_m_bits(insn);
p = insn_vex_p_bits(insn);
insn->attr = inat_get_avx_attribute(op, m, p);
@@ -383,7 +399,8 @@ int insn_get_modrm(struct insn *insn)
pfx_id = insn_last_prefix_id(insn);
insn->attr = inat_get_group_attribute(mod, pfx_id,
insn->attr);
- if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) {
+ if (insn_is_avx_or_xop(insn) && !inat_accept_vex(insn->attr) &&
+ !inat_accept_xop(insn->attr)) {
/* Bad insn */
insn->attr = 0;
return -EINVAL;
diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt
index 262f7ca1fb95..2a4e69ecc2de 100644
--- a/tools/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/arch/x86/lib/x86-opcode-map.txt
@@ -27,6 +27,11 @@
# (evo): this opcode is changed by EVEX prefix (EVEX opcode)
# (v): this opcode requires VEX prefix.
# (v1): this opcode only supports 128bit VEX.
+# (xop): this opcode accepts XOP prefix.
+#
+# XOP Superscripts
+# (W=0): this opcode requires XOP.W == 0
+# (W=1): this opcode requires XOP.W == 1
#
# Last Prefix Superscripts
# - (66): the last prefix is 0x66
@@ -194,7 +199,7 @@ AVXcode:
8c: MOV Ev,Sw
8d: LEA Gv,M
8e: MOV Sw,Ew
-8f: Grp1A (1A) | POP Ev (d64)
+8f: Grp1A (1A) | POP Ev (d64) | XOP (Prefix)
# 0x90 - 0x9f
90: NOP | PAUSE (F3) | XCHG r8,rAX
91: XCHG rCX/r9,rAX
@@ -1106,6 +1111,84 @@ AVXcode: 7
f8: URDMSR Rq,Id (F2),(v1),(11B) | UWRMSR Id,Rq (F3),(v1),(11B)
EndTable
+# From AMD64 Architecture Programmer's Manual Vol3, Appendix A.1.5
+Table: XOP map 8h
+Referrer:
+XOPcode: 0
+85: VPMACSSWW Vo,Ho,Wo,Lo
+86: VPMACSSWD Vo,Ho,Wo,Lo
+87: VPMACSSDQL Vo,Ho,Wo,Lo
+8e: VPMACSSDD Vo,Ho,Wo,Lo
+8f: VPMACSSDQH Vo,Ho,Wo,Lo
+95: VPMACSWW Vo,Ho,Wo,Lo
+96: VPMACSWD Vo,Ho,Wo,Lo
+97: VPMACSDQL Vo,Ho,Wo,Lo
+9e: VPMACSDD Vo,Ho,Wo,Lo
+9f: VPMACSDQH Vo,Ho,Wo,Lo
+a2: VPCMOV Vx,Hx,Wx,Lx (W=0) | VPCMOV Vx,Hx,Lx,Wx (W=1)
+a3: VPPERM Vo,Ho,Wo,Lo (W=0) | VPPERM Vo,Ho,Lo,Wo (W=1)
+a6: VPMADCSSWD Vo,Ho,Wo,Lo
+b6: VPMADCSWD Vo,Ho,Wo,Lo
+c0: VPROTB Vo,Wo,Ib
+c1: VPROTW Vo,Wo,Ib
+c2: VPROTD Vo,Wo,Ib
+c3: VPROTQ Vo,Wo,Ib
+cc: VPCOMccB Vo,Ho,Wo,Ib
+cd: VPCOMccW Vo,Ho,Wo,Ib
+ce: VPCOMccD Vo,Ho,Wo,Ib
+cf: VPCOMccQ Vo,Ho,Wo,Ib
+ec: VPCOMccUB Vo,Ho,Wo,Ib
+ed: VPCOMccUW Vo,Ho,Wo,Ib
+ee: VPCOMccUD Vo,Ho,Wo,Ib
+ef: VPCOMccUQ Vo,Ho,Wo,Ib
+EndTable
+
+Table: XOP map 9h
+Referrer:
+XOPcode: 1
+01: GrpXOP1
+02: GrpXOP2
+12: GrpXOP3
+80: VFRCZPS Vx,Wx
+81: VFRCZPD Vx,Wx
+82: VFRCZSS Vq,Wss
+83: VFRCZSD Vq,Wsd
+90: VPROTB Vo,Wo,Ho (W=0) | VPROTB Vo,Ho,Wo (W=1)
+91: VPROTW Vo,Wo,Ho (W=0) | VPROTB Vo,Ho,Wo (W=1)
+92: VPROTD Vo,Wo,Ho (W=0) | VPROTB Vo,Ho,Wo (W=1)
+93: VPROTQ Vo,Wo,Ho (W=0) | VPROTB Vo,Ho,Wo (W=1)
+94: VPSHLB Vo,Wo,Ho (W=0) | VPSHLB Vo,Ho,Wo (W=1)
+95: VPSHLW Vo,Wo,Ho (W=0) | VPSHLW Vo,Ho,Wo (W=1)
+96: VPSHLD Vo,Wo,Ho (W=0) | VPSHLD Vo,Ho,Wo (W=1)
+97: VPSHLQ Vo,Wo,Ho (W=0) | VPSHLQ Vo,Ho,Wo (W=1)
+98: VPSHAB Vo,Wo,Ho (W=0) | VPSHAB Vo,Ho,Wo (W=1)
+99: VPSHAW Vo,Wo,Ho (W=0) | VPSHAW Vo,Ho,Wo (W=1)
+9a: VPSHAD Vo,Wo,Ho (W=0) | VPSHAD Vo,Ho,Wo (W=1)
+9b: VPSHAQ Vo,Wo,Ho (W=0) | VPSHAQ Vo,Ho,Wo (W=1)
+c1: VPHADDBW Vo,Wo
+c2: VPHADDBD Vo,Wo
+c3: VPHADDBQ Vo,Wo
+c6: VPHADDWD Vo,Wo
+c7: VPHADDWQ Vo,Wo
+cb: VPHADDDQ Vo,Wo
+d1: VPHADDUBWD Vo,Wo
+d2: VPHADDUBD Vo,Wo
+d3: VPHADDUBQ Vo,Wo
+d6: VPHADDUWD Vo,Wo
+d7: VPHADDUWQ Vo,Wo
+db: VPHADDUDQ Vo,Wo
+e1: VPHSUBBW Vo,Wo
+e2: VPHSUBWD Vo,Wo
+e3: VPHSUBDQ Vo,Wo
+EndTable
+
+Table: XOP map Ah
+Referrer:
+XOPcode: 2
+10: BEXTR Gy,Ey,Id
+12: GrpXOP4
+EndTable
+
GrpTable: Grp1
0: ADD
1: OR
@@ -1320,3 +1403,29 @@ GrpTable: GrpRNG
4: xcrypt-cfb
5: xcrypt-ofb
EndTable
+
+# GrpXOP1-4 is shown in AMD APM Vol.3 Appendix A as XOP group #1-4
+GrpTable: GrpXOP1
+1: BLCFILL By,Ey (xop)
+2: BLSFILL By,Ey (xop)
+3: BLCS By,Ey (xop)
+4: TZMSK By,Ey (xop)
+5: BLCIC By,Ey (xop)
+6: BLSIC By,Ey (xop)
+7: T1MSKC By,Ey (xop)
+EndTable
+
+GrpTable: GrpXOP2
+1: BLCMSK By,Ey (xop)
+6: BLCI By,Ey (xop)
+EndTable
+
+GrpTable: GrpXOP3
+0: LLWPCB Ry (xop)
+1: SLWPCB Ry (xop)
+EndTable
+
+GrpTable: GrpXOP4
+0: LWPINS By,Ed,Id (xop)
+1: LWPVAL By,Ed,Id (xop)
+EndTable
diff --git a/tools/arch/x86/tools/gen-insn-attr-x86.awk b/tools/arch/x86/tools/gen-insn-attr-x86.awk
index 2c19d7fc8a85..7ea1b75e59b7 100644
--- a/tools/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/tools/arch/x86/tools/gen-insn-attr-x86.awk
@@ -21,6 +21,7 @@ function clear_vars() {
eid = -1 # escape id
gid = -1 # group id
aid = -1 # AVX id
+ xopid = -1 # XOP id
tname = ""
}
@@ -39,9 +40,11 @@ BEGIN {
ggid = 1
geid = 1
gaid = 0
+ gxopid = 0
delete etable
delete gtable
delete atable
+ delete xoptable
opnd_expr = "^[A-Za-z/]"
ext_expr = "^\\("
@@ -61,6 +64,7 @@ BEGIN {
imm_flag["Ob"] = "INAT_MOFFSET"
imm_flag["Ov"] = "INAT_MOFFSET"
imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+ imm_flag["Lo"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
force64_expr = "\\([df]64\\)"
@@ -87,6 +91,8 @@ BEGIN {
evexonly_expr = "\\(ev\\)"
# (es) is the same as (ev) but also "SCALABLE" i.e. W and pp determine operand size
evex_scalable_expr = "\\(es\\)"
+ # All opcodes in XOP table or with (xop) superscript accept XOP prefix
+ xopok_expr = "\\(xop\\)"
prefix_expr = "\\(Prefix\\)"
prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
@@ -106,6 +112,7 @@ BEGIN {
prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
prefix_num["EVEX"] = "INAT_PFX_EVEX"
prefix_num["REX2"] = "INAT_PFX_REX2"
+ prefix_num["XOP"] = "INAT_PFX_XOP"
clear_vars()
}
@@ -147,6 +154,7 @@ function array_size(arr, i,c) {
if (NF != 1) {
# AVX/escape opcode table
aid = $2
+ xopid = -1
if (gaid <= aid)
gaid = aid + 1
if (tname == "") # AVX only opcode table
@@ -156,6 +164,20 @@ function array_size(arr, i,c) {
tname = "inat_primary_table"
}
+/^XOPcode:/ {
+ if (NF != 1) {
+ # XOP opcode table
+ xopid = $2
+ aid = -1
+ if (gxopid <= xopid)
+ gxopid = xopid + 1
+ if (tname == "") # XOP only opcode table
+ tname = sprintf("inat_xop_table_%d", $2)
+ }
+ if (xopid == -1 && eid == -1) # primary opcode table
+ tname = "inat_primary_table"
+}
+
/^GrpTable:/ {
print "/* " $0 " */"
if (!($2 in group))
@@ -206,6 +228,8 @@ function print_table(tbl,name,fmt,n)
etable[eid,0] = tname
if (aid >= 0)
atable[aid,0] = tname
+ else if (xopid >= 0)
+ xoptable[xopid] = tname
}
if (array_size(lptable1) != 0) {
print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
@@ -347,6 +371,8 @@ function convert_operands(count,opnd, i,j,imm,mod)
flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
flags = add_flags(flags, "INAT_VEXOK")
+ else if (match(ext, xopok_expr) || xopid >= 0)
+ flags = add_flags(flags, "INAT_XOPOK")
# check prefixes
if (match(ext, prefix_expr)) {
@@ -413,6 +439,14 @@ END {
print " ["i"]["j"] = "atable[i,j]","
print "};\n"
+ print "/* XOP opcode map array */"
+ print "const insn_attr_t * const inat_xop_tables[X86_XOP_M_MAX - X86_XOP_M_MIN + 1]" \
+ " = {"
+ for (i = 0; i < gxopid; i++)
+ if (xoptable[i])
+ print " ["i"] = "xoptable[i]","
+ print "};"
+
print "#else /* !__BOOT_COMPRESSED */\n"
print "/* Escape opcode map array */"
@@ -430,6 +464,10 @@ END {
"[INAT_LSTPFX_MAX + 1];"
print ""
+ print "/* XOP opcode map array */"
+ print "static const insn_attr_t *inat_xop_tables[X86_XOP_M_MAX - X86_XOP_M_MIN + 1];"
+ print ""
+
print "static void inat_init_tables(void)"
print "{"
@@ -455,6 +493,12 @@ END {
if (atable[i,j])
print "\tinat_avx_tables["i"]["j"] = "atable[i,j]";"
+ print ""
+ print "\t/* Print XOP opcode map array */"
+ for (i = 0; i < gxopid; i++)
+ if (xoptable[i])
+ print "\tinat_xop_tables["i"] = "xoptable[i]";"
+
print "}"
print "#endif"
}
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 57c669d2aa90..55d59ed507d5 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -193,7 +193,7 @@ static int load_xbc_from_initrd(int fd, char **buf)
if (stat.st_size < BOOTCONFIG_FOOTER_SIZE)
return 0;
- if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0)
+ if (lseek(fd, -(off_t)BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0)
return pr_errno("Failed to lseek for magic", -errno);
if (read(fd, magic, BOOTCONFIG_MAGIC_LEN) < 0)
@@ -203,7 +203,7 @@ static int load_xbc_from_initrd(int fd, char **buf)
if (memcmp(magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN) != 0)
return 0;
- if (lseek(fd, -BOOTCONFIG_FOOTER_SIZE, SEEK_END) < 0)
+ if (lseek(fd, -(off_t)BOOTCONFIG_FOOTER_SIZE, SEEK_END) < 0)
return pr_errno("Failed to lseek for size", -errno);
if (read(fd, &size, sizeof(uint32_t)) < 0)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index ca860fd97d8d..d0a36f442db7 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -16,7 +16,7 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **gen** *COMMAND*
-*OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } }
+*OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } | [ { **-S** | **--sign** } {**-k** <private_key.pem>} **-i** <certificate.x509> ] }
*COMMAND* := { **object** | **skeleton** | **help** }
@@ -186,6 +186,17 @@ OPTIONS
skeleton). A light skeleton contains a loader eBPF program. It does not use
the majority of the libbpf infrastructure, and does not need libelf.
+-S, --sign
+ For skeletons, generate a signed skeleton. This option must be used with
+ **-k** and **-i**. Using this flag implicitly enables **--use-loader**.
+
+-k <private_key.pem>
+ Path to the private key file in PEM format, required for signing.
+
+-i <certificate.x509>
+ Path to the X.509 certificate file in PEM or DER format, required for
+ signing.
+
EXAMPLES
========
**$ cat example1.bpf.c**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index f69fd92df8d8..009633294b09 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -18,7 +18,7 @@ SYNOPSIS
*OPTIONS* := { |COMMON_OPTIONS| |
{ **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } |
-{ **-L** | **--use-loader** } }
+{ **-L** | **--use-loader** } | [ { **-S** | **--sign** } **-k** <private_key.pem> **-i** <certificate.x509> ] }
*COMMANDS* :=
{ **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** |
@@ -248,6 +248,18 @@ OPTIONS
creating the maps, and loading the programs (see **bpftool prog tracelog**
as a way to dump those messages).
+-S, --sign
+ Enable signing of the BPF program before loading. This option must be
+ used with **-k** and **-i**. Using this flag implicitly enables
+ **--use-loader**.
+
+-k <private_key.pem>
+ Path to the private key file in PEM format, required when signing.
+
+-i <certificate.x509>
+ Path to the X.509 certificate file in PEM or DER format, required when
+ signing.
+
EXAMPLES
========
**# bpftool prog show**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-token.rst b/tools/bpf/bpftool/Documentation/bpftool-token.rst
new file mode 100644
index 000000000000..d082c499cfe3
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-token.rst
@@ -0,0 +1,64 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+================
+bpftool-token
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF tokens
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+.. include:: substitutions.rst
+
+SYNOPSIS
+========
+
+**bpftool** [*OPTIONS*] **token** *COMMAND*
+
+*OPTIONS* := { |COMMON_OPTIONS| }
+
+*COMMANDS* := { **show** | **list** | **help** }
+
+TOKEN COMMANDS
+===============
+
+| **bpftool** **token** { **show** | **list** }
+| **bpftool** **token help**
+|
+
+DESCRIPTION
+===========
+bpftool token { show | list }
+ List BPF token information for each *bpffs* mount point containing token
+ information on the system. Information include mount point path, allowed
+ **bpf**\ () system call commands, maps, programs, and attach types for the
+ token.
+
+bpftool prog help
+ Print short help message.
+
+OPTIONS
+========
+.. include:: common_options.rst
+
+EXAMPLES
+========
+|
+| **# mkdir -p /sys/fs/bpf/token**
+| **# mount -t bpf bpffs /sys/fs/bpf/token** \
+| **-o delegate_cmds=prog_load:map_create** \
+| **-o delegate_progs=kprobe** \
+| **-o delegate_attachs=xdp**
+| **# bpftool token list**
+
+::
+
+ token_info /sys/fs/bpf/token
+ allowed_cmds:
+ map_create prog_load
+ allowed_maps:
+ allowed_progs:
+ kprobe
+ allowed_attachs:
+ xdp
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 9e9a5f006cd2..586d1b2595d1 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -130,8 +130,8 @@ include $(FEATURES_DUMP)
endif
endif
-LIBS = $(LIBBPF) -lelf -lz
-LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz
+LIBS = $(LIBBPF) -lelf -lz -lcrypto
+LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz -lcrypto
ifeq ($(feature-libelf-zstd),1)
LIBS += -lzstd
@@ -194,7 +194,7 @@ endif
BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool
-BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o)
+BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o sign.o)
$(BOOTSTRAP_OBJS): $(LIBBPF_BOOTSTRAP)
OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index a759ba24471d..53bcfeb1a76e 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -262,7 +262,7 @@ _bpftool()
# Deal with options
if [[ ${words[cword]} == -* ]]; then
local c='--version --json --pretty --bpffs --mapcompat --debug \
- --use-loader --base-btf'
+ --use-loader --base-btf --sign -i -k'
COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
return 0
fi
@@ -283,7 +283,7 @@ _bpftool()
_sysfs_get_netdevs
return 0
;;
- file|pinned|-B|--base-btf)
+ file|pinned|-B|--base-btf|-i|-k)
_filedir
return 0
;;
@@ -296,13 +296,21 @@ _bpftool()
# Remove all options so completions don't have to deal with them.
local i pprev
for (( i=1; i < ${#words[@]}; )); do
- if [[ ${words[i]::1} == - ]] &&
- [[ ${words[i]} != "-B" ]] && [[ ${words[i]} != "--base-btf" ]]; then
- words=( "${words[@]:0:i}" "${words[@]:i+1}" )
- [[ $i -le $cword ]] && cword=$(( cword - 1 ))
- else
- i=$(( ++i ))
- fi
+ case ${words[i]} in
+ # Remove option and its argument
+ -B|--base-btf|-i|-k)
+ words=( "${words[@]:0:i}" "${words[@]:i+2}" )
+ [[ $i -le $(($cword + 1)) ]] && cword=$(( cword - 2 ))
+ ;;
+ # No argument, remove option only
+ -*)
+ words=( "${words[@]:0:i}" "${words[@]:i+1}" )
+ [[ $i -le $cword ]] && cword=$(( cword - 1 ))
+ ;;
+ *)
+ i=$(( ++i ))
+ ;;
+ esac
done
cur=${words[cword]}
prev=${words[cword - 1]}
@@ -1215,6 +1223,17 @@ _bpftool()
;;
esac
;;
+ token)
+ case $command in
+ show|list)
+ return 0
+ ;;
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'help show list' -- "$cur" ) )
+ ;;
+ esac
+ ;;
esac
} &&
complete -F _bpftool bpftool
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 4e896d8a2416..ff12628593ae 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -38,7 +38,7 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
__u32 info_len = sizeof(info);
const char *prog_name = NULL;
struct btf *prog_btf = NULL;
- struct bpf_func_info finfo;
+ struct bpf_func_info finfo = {};
__u32 finfo_rec_size;
char prog_str[1024];
int err;
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 944ebe21a216..ec356deb27c9 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -2,6 +2,10 @@
// Copyright (C) 2017 Facebook
// Author: Roman Gushchin <guro@fb.com>
+#undef GCC_VERSION
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
#define _XOPEN_SOURCE 500
#include <errno.h>
#include <fcntl.h>
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index b07317d2842f..e8daf963ecef 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -21,6 +21,7 @@
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/vfs.h>
+#include <sys/utsname.h>
#include <linux/filter.h>
#include <linux/limits.h>
@@ -31,6 +32,7 @@
#include <bpf/hashmap.h>
#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
#include <bpf/btf.h>
+#include <zlib.h>
#include "main.h"
@@ -1208,3 +1210,94 @@ int pathname_concat(char *buf, int buf_sz, const char *path,
return 0;
}
+
+static bool read_next_kernel_config_option(gzFile file, char *buf, size_t n,
+ char **value)
+{
+ char *sep;
+
+ while (gzgets(file, buf, n)) {
+ if (strncmp(buf, "CONFIG_", 7))
+ continue;
+
+ sep = strchr(buf, '=');
+ if (!sep)
+ continue;
+
+ /* Trim ending '\n' */
+ buf[strlen(buf) - 1] = '\0';
+
+ /* Split on '=' and ensure that a value is present. */
+ *sep = '\0';
+ if (!sep[1])
+ continue;
+
+ *value = sep + 1;
+ return true;
+ }
+
+ return false;
+}
+
+int read_kernel_config(const struct kernel_config_option *requested_options,
+ size_t num_options, char **out_values,
+ const char *define_prefix)
+{
+ struct utsname utsn;
+ char path[PATH_MAX];
+ gzFile file = NULL;
+ char buf[4096];
+ char *value;
+ size_t i;
+ int ret = 0;
+
+ if (!requested_options || !out_values || num_options == 0)
+ return -1;
+
+ if (!uname(&utsn)) {
+ snprintf(path, sizeof(path), "/boot/config-%s", utsn.release);
+
+ /* gzopen also accepts uncompressed files. */
+ file = gzopen(path, "r");
+ }
+
+ if (!file) {
+ /* Some distributions build with CONFIG_IKCONFIG=y and put the
+ * config file at /proc/config.gz.
+ */
+ file = gzopen("/proc/config.gz", "r");
+ }
+
+ if (!file) {
+ p_info("skipping kernel config, can't open file: %s",
+ strerror(errno));
+ return -1;
+ }
+
+ if (!gzgets(file, buf, sizeof(buf)) || !gzgets(file, buf, sizeof(buf))) {
+ p_info("skipping kernel config, can't read from file: %s",
+ strerror(errno));
+ ret = -1;
+ goto end_parse;
+ }
+
+ if (strcmp(buf, "# Automatically generated file; DO NOT EDIT.\n")) {
+ p_info("skipping kernel config, can't find correct file");
+ ret = -1;
+ goto end_parse;
+ }
+
+ while (read_next_kernel_config_option(file, buf, sizeof(buf), &value)) {
+ for (i = 0; i < num_options; i++) {
+ if ((define_prefix && !requested_options[i].macro_dump) ||
+ out_values[i] || strcmp(buf, requested_options[i].name))
+ continue;
+
+ out_values[i] = strdup(value);
+ }
+ }
+
+end_parse:
+ gzclose(file);
+ return ret;
+}
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 24fecdf8e430..0f6070a0c8e7 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -10,7 +10,6 @@
#ifdef USE_LIBCAP
#include <sys/capability.h>
#endif
-#include <sys/utsname.h>
#include <sys/vfs.h>
#include <linux/filter.h>
@@ -18,7 +17,6 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#include <zlib.h>
#include "main.h"
@@ -327,40 +325,9 @@ static void probe_jit_limit(void)
}
}
-static bool read_next_kernel_config_option(gzFile file, char *buf, size_t n,
- char **value)
-{
- char *sep;
-
- while (gzgets(file, buf, n)) {
- if (strncmp(buf, "CONFIG_", 7))
- continue;
-
- sep = strchr(buf, '=');
- if (!sep)
- continue;
-
- /* Trim ending '\n' */
- buf[strlen(buf) - 1] = '\0';
-
- /* Split on '=' and ensure that a value is present. */
- *sep = '\0';
- if (!sep[1])
- continue;
-
- *value = sep + 1;
- return true;
- }
-
- return false;
-}
-
static void probe_kernel_image_config(const char *define_prefix)
{
- static const struct {
- const char * const name;
- bool macro_dump;
- } options[] = {
+ struct kernel_config_option options[] = {
/* Enable BPF */
{ "CONFIG_BPF", },
/* Enable bpf() syscall */
@@ -435,52 +402,11 @@ static void probe_kernel_image_config(const char *define_prefix)
{ "CONFIG_HZ", true, }
};
char *values[ARRAY_SIZE(options)] = { };
- struct utsname utsn;
- char path[PATH_MAX];
- gzFile file = NULL;
- char buf[4096];
- char *value;
size_t i;
- if (!uname(&utsn)) {
- snprintf(path, sizeof(path), "/boot/config-%s", utsn.release);
-
- /* gzopen also accepts uncompressed files. */
- file = gzopen(path, "r");
- }
-
- if (!file) {
- /* Some distributions build with CONFIG_IKCONFIG=y and put the
- * config file at /proc/config.gz.
- */
- file = gzopen("/proc/config.gz", "r");
- }
- if (!file) {
- p_info("skipping kernel config, can't open file: %s",
- strerror(errno));
- goto end_parse;
- }
- /* Sanity checks */
- if (!gzgets(file, buf, sizeof(buf)) ||
- !gzgets(file, buf, sizeof(buf))) {
- p_info("skipping kernel config, can't read from file: %s",
- strerror(errno));
- goto end_parse;
- }
- if (strcmp(buf, "# Automatically generated file; DO NOT EDIT.\n")) {
- p_info("skipping kernel config, can't find correct file");
- goto end_parse;
- }
-
- while (read_next_kernel_config_option(file, buf, sizeof(buf), &value)) {
- for (i = 0; i < ARRAY_SIZE(options); i++) {
- if ((define_prefix && !options[i].macro_dump) ||
- values[i] || strcmp(buf, options[i].name))
- continue;
-
- values[i] = strdup(value);
- }
- }
+ if (read_kernel_config(options, ARRAY_SIZE(options), values,
+ define_prefix))
+ return;
for (i = 0; i < ARRAY_SIZE(options); i++) {
if (define_prefix && !options[i].macro_dump)
@@ -488,10 +414,6 @@ static void probe_kernel_image_config(const char *define_prefix)
print_kernel_option(options[i].name, values[i], define_prefix);
free(values[i]);
}
-
-end_parse:
- if (file)
- gzclose(file);
}
static bool probe_bpf_syscall(const char *define_prefix)
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 67a60114368f..993c7d9484a4 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -688,10 +688,17 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *header_guard)
{
DECLARE_LIBBPF_OPTS(gen_loader_opts, opts);
+ struct bpf_load_and_run_opts sopts = {};
+ char sig_buf[MAX_SIG_SIZE];
+ __u8 prog_sha[SHA256_DIGEST_LENGTH];
struct bpf_map *map;
+
char ident[256];
int err = 0;
+ if (sign_progs)
+ opts.gen_hash = true;
+
err = bpf_object__gen_loader(obj, &opts);
if (err)
return err;
@@ -701,6 +708,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
p_err("failed to load object file");
goto out;
}
+
/* If there was no error during load then gen_loader_opts
* are populated with the loader program.
*/
@@ -780,8 +788,52 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
print_hex(opts.insns, opts.insns_sz);
codegen("\
\n\
- \"; \n\
- \n\
+ \";\n");
+
+ if (sign_progs) {
+ sopts.insns = opts.insns;
+ sopts.insns_sz = opts.insns_sz;
+ sopts.excl_prog_hash = prog_sha;
+ sopts.excl_prog_hash_sz = sizeof(prog_sha);
+ sopts.signature = sig_buf;
+ sopts.signature_sz = MAX_SIG_SIZE;
+
+ err = bpftool_prog_sign(&sopts);
+ if (err < 0) {
+ p_err("failed to sign program");
+ goto out;
+ }
+
+ codegen("\
+ \n\
+ static const char opts_sig[] __attribute__((__aligned__(8))) = \"\\\n\
+ ");
+ print_hex((const void *)sig_buf, sopts.signature_sz);
+ codegen("\
+ \n\
+ \";\n");
+
+ codegen("\
+ \n\
+ static const char opts_excl_hash[] __attribute__((__aligned__(8))) = \"\\\n\
+ ");
+ print_hex((const void *)prog_sha, sizeof(prog_sha));
+ codegen("\
+ \n\
+ \";\n");
+
+ codegen("\
+ \n\
+ opts.signature = (void *)opts_sig; \n\
+ opts.signature_sz = sizeof(opts_sig) - 1; \n\
+ opts.excl_prog_hash = (void *)opts_excl_hash; \n\
+ opts.excl_prog_hash_sz = sizeof(opts_excl_hash) - 1; \n\
+ opts.keyring_id = skel->keyring_id; \n\
+ ");
+ }
+
+ codegen("\
+ \n\
opts.ctx = (struct bpf_loader_ctx *)skel; \n\
opts.data_sz = sizeof(opts_data) - 1; \n\
opts.data = (void *)opts_data; \n\
@@ -1240,7 +1292,7 @@ static int do_skeleton(int argc, char **argv)
err = -errno;
libbpf_strerror(err, err_buf, sizeof(err_buf));
p_err("failed to open BPF object file: %s", err_buf);
- goto out;
+ goto out_obj;
}
bpf_object__for_each_map(map, obj) {
@@ -1355,6 +1407,13 @@ static int do_skeleton(int argc, char **argv)
printf("\t} links;\n");
}
+ if (sign_progs) {
+ codegen("\
+ \n\
+ __s32 keyring_id; \n\
+ ");
+ }
+
if (btf) {
err = codegen_datasecs(obj, obj_name);
if (err)
@@ -1552,6 +1611,7 @@ static int do_skeleton(int argc, char **argv)
err = 0;
out:
bpf_object__close(obj);
+out_obj:
if (obj_data)
munmap(obj_data, mmap_sz);
close(fd);
@@ -1930,7 +1990,7 @@ static int do_help(int argc, char **argv)
" %1$s %2$s help\n"
"\n"
" " HELP_SPEC_OPTIONS " |\n"
- " {-L|--use-loader} }\n"
+ " {-L|--use-loader} | [ {-S|--sign } {-k} <private_key.pem> {-i} <certificate.x509> ]}\n"
"",
bin_name, "gen");
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index a773e05d5ade..bdcd717b0348 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -282,11 +282,52 @@ get_addr_cookie_array(__u64 *addrs, __u64 *cookies, __u32 count)
return data;
}
+static bool is_x86_ibt_enabled(void)
+{
+#if defined(__x86_64__)
+ struct kernel_config_option options[] = {
+ { "CONFIG_X86_KERNEL_IBT", },
+ };
+ char *values[ARRAY_SIZE(options)] = { };
+ bool ret;
+
+ if (read_kernel_config(options, ARRAY_SIZE(options), values, NULL))
+ return false;
+
+ ret = !!values[0];
+ free(values[0]);
+ return ret;
+#else
+ return false;
+#endif
+}
+
+static bool
+symbol_matches_target(__u64 sym_addr, __u64 target_addr, bool is_ibt_enabled)
+{
+ if (sym_addr == target_addr)
+ return true;
+
+ /*
+ * On x86_64 architectures with CET (Control-flow Enforcement Technology),
+ * function entry points have a 4-byte 'endbr' instruction prefix.
+ * This causes kprobe hooks to target the address *after* 'endbr'
+ * (symbol address + 4), preserving the CET instruction.
+ * Here we check if the symbol address matches the hook target address
+ * minus 4, indicating a CET-enabled function entry point.
+ */
+ if (is_ibt_enabled && sym_addr == target_addr - 4)
+ return true;
+
+ return false;
+}
+
static void
show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
{
struct addr_cookie *data;
__u32 i, j = 0;
+ bool is_ibt_enabled;
jsonw_bool_field(json_wtr, "retprobe",
info->kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN);
@@ -306,11 +347,13 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
if (!dd.sym_count)
goto error;
+ is_ibt_enabled = is_x86_ibt_enabled();
for (i = 0; i < dd.sym_count; i++) {
- if (dd.sym_mapping[i].address != data[j].addr)
+ if (!symbol_matches_target(dd.sym_mapping[i].address,
+ data[j].addr, is_ibt_enabled))
continue;
jsonw_start_object(json_wtr);
- jsonw_uint_field(json_wtr, "addr", dd.sym_mapping[i].address);
+ jsonw_uint_field(json_wtr, "addr", (unsigned long)data[j].addr);
jsonw_string_field(json_wtr, "func", dd.sym_mapping[i].name);
/* Print null if it is vmlinux */
if (dd.sym_mapping[i].module[0] == '\0') {
@@ -719,6 +762,7 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info)
{
struct addr_cookie *data;
__u32 i, j = 0;
+ bool is_ibt_enabled;
if (!info->kprobe_multi.count)
return;
@@ -742,12 +786,14 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info)
if (!dd.sym_count)
goto error;
+ is_ibt_enabled = is_x86_ibt_enabled();
printf("\n\t%-16s %-16s %s", "addr", "cookie", "func [module]");
for (i = 0; i < dd.sym_count; i++) {
- if (dd.sym_mapping[i].address != data[j].addr)
+ if (!symbol_matches_target(dd.sym_mapping[i].address,
+ data[j].addr, is_ibt_enabled))
continue;
printf("\n\t%016lx %-16llx %s",
- dd.sym_mapping[i].address, data[j].cookie, dd.sym_mapping[i].name);
+ (unsigned long)data[j].addr, data[j].cookie, dd.sym_mapping[i].name);
if (dd.sym_mapping[i].module[0] != '\0')
printf(" [%s] ", dd.sym_mapping[i].module);
else
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 2b7f2bd3a7db..a829a6a49037 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -33,6 +33,9 @@ bool relaxed_maps;
bool use_loader;
struct btf *base_btf;
struct hashmap *refs_table;
+bool sign_progs;
+const char *private_key_path;
+const char *cert_path;
static void __noreturn clean_and_exit(int i)
{
@@ -61,7 +64,7 @@ static int do_help(int argc, char **argv)
" %s batch file FILE\n"
" %s version\n"
"\n"
- " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n"
+ " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter | token }\n"
" " HELP_SPEC_OPTIONS " |\n"
" {-V|--version} }\n"
"",
@@ -87,6 +90,7 @@ static const struct cmd commands[] = {
{ "gen", do_gen },
{ "struct_ops", do_struct_ops },
{ "iter", do_iter },
+ { "token", do_token },
{ "version", do_version },
{ 0 }
};
@@ -447,6 +451,7 @@ int main(int argc, char **argv)
{ "nomount", no_argument, NULL, 'n' },
{ "debug", no_argument, NULL, 'd' },
{ "use-loader", no_argument, NULL, 'L' },
+ { "sign", no_argument, NULL, 'S' },
{ "base-btf", required_argument, NULL, 'B' },
{ 0 }
};
@@ -473,7 +478,7 @@ int main(int argc, char **argv)
bin_name = "bpftool";
opterr = 0;
- while ((opt = getopt_long(argc, argv, "VhpjfLmndB:l",
+ while ((opt = getopt_long(argc, argv, "VhpjfLmndSi:k:B:l",
options, NULL)) >= 0) {
switch (opt) {
case 'V':
@@ -519,6 +524,16 @@ int main(int argc, char **argv)
case 'L':
use_loader = true;
break;
+ case 'S':
+ sign_progs = true;
+ use_loader = true;
+ break;
+ case 'k':
+ private_key_path = optarg;
+ break;
+ case 'i':
+ cert_path = optarg;
+ break;
default:
p_err("unrecognized option '%s'", argv[optind - 1]);
if (json_output)
@@ -533,6 +548,16 @@ int main(int argc, char **argv)
if (argc < 0)
usage();
+ if (sign_progs && (private_key_path == NULL || cert_path == NULL)) {
+ p_err("-i <identity_x509_cert> and -k <private_key> must be supplied with -S for signing");
+ return -EINVAL;
+ }
+
+ if (!sign_progs && (private_key_path != NULL || cert_path != NULL)) {
+ p_err("--sign (or -S) must be explicitly passed with -i <identity_x509_cert> and -k <private_key> to sign the programs");
+ return -EINVAL;
+ }
+
if (version_requested)
ret = do_version(argc, argv);
else
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 6db704fda5c0..1130299cede0 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -6,9 +6,14 @@
/* BFD and kernel.h both define GCC_VERSION, differently */
#undef GCC_VERSION
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
#include <stdbool.h>
#include <stdio.h>
+#include <errno.h>
#include <stdlib.h>
+#include <bpf/skel_internal.h>
#include <linux/bpf.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
@@ -52,6 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr)
})
#define ERR_MAX_LEN 1024
+#define MAX_SIG_SIZE 4096
#define BPF_TAG_FMT "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx"
@@ -85,6 +91,9 @@ extern bool relaxed_maps;
extern bool use_loader;
extern struct btf *base_btf;
extern struct hashmap *refs_table;
+extern bool sign_progs;
+extern const char *private_key_path;
+extern const char *cert_path;
void __printf(1, 2) p_err(const char *fmt, ...);
void __printf(1, 2) p_info(const char *fmt, ...);
@@ -166,6 +175,7 @@ int do_tracelog(int argc, char **arg) __weak;
int do_feature(int argc, char **argv) __weak;
int do_struct_ops(int argc, char **argv) __weak;
int do_iter(int argc, char **argv) __weak;
+int do_token(int argc, char **argv) __weak;
int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
int prog_parse_fd(int *argc, char ***argv);
@@ -274,4 +284,15 @@ int pathname_concat(char *buf, int buf_sz, const char *path,
/* print netfilter bpf_link info */
void netfilter_dump_plain(const struct bpf_link_info *info);
void netfilter_dump_json(const struct bpf_link_info *info, json_writer_t *wtr);
+
+struct kernel_config_option {
+ const char *name;
+ bool macro_dump;
+};
+
+int read_kernel_config(const struct kernel_config_option *requested_options,
+ size_t num_options, char **out_values,
+ const char *define_prefix);
+int bpftool_prog_sign(struct bpf_load_and_run_opts *opts);
+__u32 register_session_key(const char *key_der_path);
#endif
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 9722d841abc0..6daf19809ca4 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -23,6 +23,7 @@
#include <linux/err.h>
#include <linux/perf_event.h>
#include <linux/sizes.h>
+#include <linux/keyctl.h>
#include <bpf/bpf.h>
#include <bpf/btf.h>
@@ -714,7 +715,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
if (mode == DUMP_JITED) {
if (info->jited_prog_len == 0 || !info->jited_prog_insns) {
- p_info("no instructions returned");
+ p_err("error retrieving jit dump: no instructions returned or kernel.kptr_restrict set?");
return -1;
}
buf = u64_to_ptr(info->jited_prog_insns);
@@ -1930,6 +1931,8 @@ static int try_loader(struct gen_loader_opts *gen)
{
struct bpf_load_and_run_opts opts = {};
struct bpf_loader_ctx *ctx;
+ char sig_buf[MAX_SIG_SIZE];
+ __u8 prog_sha[SHA256_DIGEST_LENGTH];
int ctx_sz = sizeof(*ctx) + 64 * max(sizeof(struct bpf_map_desc),
sizeof(struct bpf_prog_desc));
int log_buf_sz = (1u << 24) - 1;
@@ -1953,6 +1956,26 @@ static int try_loader(struct gen_loader_opts *gen)
opts.insns = gen->insns;
opts.insns_sz = gen->insns_sz;
fds_before = count_open_fds();
+
+ if (sign_progs) {
+ opts.excl_prog_hash = prog_sha;
+ opts.excl_prog_hash_sz = sizeof(prog_sha);
+ opts.signature = sig_buf;
+ opts.signature_sz = MAX_SIG_SIZE;
+ opts.keyring_id = KEY_SPEC_SESSION_KEYRING;
+
+ err = bpftool_prog_sign(&opts);
+ if (err < 0) {
+ p_err("failed to sign program");
+ goto out;
+ }
+
+ err = register_session_key(cert_path);
+ if (err < 0) {
+ p_err("failed to add session key");
+ goto out;
+ }
+ }
err = bpf_load_and_run(&opts);
fd_delta = count_open_fds() - fds_before;
if (err < 0 || verifier_logs) {
@@ -1961,6 +1984,7 @@ static int try_loader(struct gen_loader_opts *gen)
fprintf(stderr, "loader prog leaked %d FDs\n",
fd_delta);
}
+out:
free(log_buf);
return err;
}
@@ -1988,6 +2012,9 @@ static int do_loader(int argc, char **argv)
goto err_close_obj;
}
+ if (sign_progs)
+ gen.gen_hash = true;
+
err = bpf_object__gen_loader(obj, &gen);
if (err)
goto err_close_obj;
@@ -2262,7 +2289,7 @@ static void profile_print_readings(void)
static char *profile_target_name(int tgt_fd)
{
- struct bpf_func_info func_info;
+ struct bpf_func_info func_info = {};
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
const struct btf_type *t;
@@ -2562,7 +2589,7 @@ static int do_help(int argc, char **argv)
" METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n"
" " HELP_SPEC_OPTIONS " |\n"
" {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n"
- " {-L|--use-loader} }\n"
+ " {-L|--use-loader} | [ {-S|--sign } {-k} <private_key.pem> {-i} <certificate.x509> ] \n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/sign.c b/tools/bpf/bpftool/sign.c
new file mode 100644
index 000000000000..b34f74d210e9
--- /dev/null
+++ b/tools/bpf/bpftool/sign.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2025 Google LLC.
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <getopt.h>
+#include <err.h>
+#include <openssl/opensslv.h>
+#include <openssl/bio.h>
+#include <openssl/evp.h>
+#include <openssl/pem.h>
+#include <openssl/err.h>
+#include <openssl/cms.h>
+#include <linux/keyctl.h>
+#include <errno.h>
+
+#include <bpf/skel_internal.h>
+
+#include "main.h"
+
+#define OPEN_SSL_ERR_BUF_LEN 256
+
+static void display_openssl_errors(int l)
+{
+ char buf[OPEN_SSL_ERR_BUF_LEN];
+ const char *file;
+ const char *data;
+ unsigned long e;
+ int flags;
+ int line;
+
+ while ((e = ERR_get_error_all(&file, &line, NULL, &data, &flags))) {
+ ERR_error_string_n(e, buf, sizeof(buf));
+ if (data && (flags & ERR_TXT_STRING)) {
+ p_err("OpenSSL %s: %s:%d: %s", buf, file, line, data);
+ } else {
+ p_err("OpenSSL %s: %s:%d", buf, file, line);
+ }
+ }
+}
+
+#define DISPLAY_OSSL_ERR(cond) \
+ do { \
+ bool __cond = (cond); \
+ if (__cond && ERR_peek_error()) \
+ display_openssl_errors(__LINE__);\
+ } while (0)
+
+static EVP_PKEY *read_private_key(const char *pkey_path)
+{
+ EVP_PKEY *private_key = NULL;
+ BIO *b;
+
+ b = BIO_new_file(pkey_path, "rb");
+ private_key = PEM_read_bio_PrivateKey(b, NULL, NULL, NULL);
+ BIO_free(b);
+ DISPLAY_OSSL_ERR(!private_key);
+ return private_key;
+}
+
+static X509 *read_x509(const char *x509_name)
+{
+ unsigned char buf[2];
+ X509 *x509 = NULL;
+ BIO *b;
+ int n;
+
+ b = BIO_new_file(x509_name, "rb");
+ if (!b)
+ goto cleanup;
+
+ /* Look at the first two bytes of the file to determine the encoding */
+ n = BIO_read(b, buf, 2);
+ if (n != 2)
+ goto cleanup;
+
+ if (BIO_reset(b) != 0)
+ goto cleanup;
+
+ if (buf[0] == 0x30 && buf[1] >= 0x81 && buf[1] <= 0x84)
+ /* Assume raw DER encoded X.509 */
+ x509 = d2i_X509_bio(b, NULL);
+ else
+ /* Assume PEM encoded X.509 */
+ x509 = PEM_read_bio_X509(b, NULL, NULL, NULL);
+
+cleanup:
+ BIO_free(b);
+ DISPLAY_OSSL_ERR(!x509);
+ return x509;
+}
+
+__u32 register_session_key(const char *key_der_path)
+{
+ unsigned char *der_buf = NULL;
+ X509 *x509 = NULL;
+ int key_id = -1;
+ int der_len;
+
+ if (!key_der_path)
+ return key_id;
+ x509 = read_x509(key_der_path);
+ if (!x509)
+ goto cleanup;
+ der_len = i2d_X509(x509, &der_buf);
+ if (der_len < 0)
+ goto cleanup;
+ key_id = syscall(__NR_add_key, "asymmetric", key_der_path, der_buf,
+ (size_t)der_len, KEY_SPEC_SESSION_KEYRING);
+cleanup:
+ X509_free(x509);
+ OPENSSL_free(der_buf);
+ DISPLAY_OSSL_ERR(key_id == -1);
+ return key_id;
+}
+
+int bpftool_prog_sign(struct bpf_load_and_run_opts *opts)
+{
+ BIO *bd_in = NULL, *bd_out = NULL;
+ EVP_PKEY *private_key = NULL;
+ CMS_ContentInfo *cms = NULL;
+ long actual_sig_len = 0;
+ X509 *x509 = NULL;
+ int err = 0;
+
+ bd_in = BIO_new_mem_buf(opts->insns, opts->insns_sz);
+ if (!bd_in) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ private_key = read_private_key(private_key_path);
+ if (!private_key) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ x509 = read_x509(cert_path);
+ if (!x509) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ cms = CMS_sign(NULL, NULL, NULL, NULL,
+ CMS_NOCERTS | CMS_PARTIAL | CMS_BINARY | CMS_DETACHED |
+ CMS_STREAM);
+ if (!cms) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (!CMS_add1_signer(cms, x509, private_key, EVP_sha256(),
+ CMS_NOCERTS | CMS_BINARY | CMS_NOSMIMECAP |
+ CMS_USE_KEYID | CMS_NOATTR)) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (CMS_final(cms, bd_in, NULL, CMS_NOCERTS | CMS_BINARY) != 1) {
+ err = -EIO;
+ goto cleanup;
+ }
+
+ EVP_Digest(opts->insns, opts->insns_sz, opts->excl_prog_hash,
+ &opts->excl_prog_hash_sz, EVP_sha256(), NULL);
+
+ bd_out = BIO_new(BIO_s_mem());
+ if (!bd_out) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ if (!i2d_CMS_bio_stream(bd_out, cms, NULL, 0)) {
+ err = -EIO;
+ goto cleanup;
+ }
+
+ actual_sig_len = BIO_get_mem_data(bd_out, NULL);
+ if (actual_sig_len <= 0) {
+ err = -EIO;
+ goto cleanup;
+ }
+
+ if ((size_t)actual_sig_len > opts->signature_sz) {
+ err = -ENOSPC;
+ goto cleanup;
+ }
+
+ if (BIO_read(bd_out, opts->signature, actual_sig_len) != actual_sig_len) {
+ err = -EIO;
+ goto cleanup;
+ }
+
+ opts->signature_sz = actual_sig_len;
+cleanup:
+ BIO_free(bd_out);
+ CMS_ContentInfo_free(cms);
+ X509_free(x509);
+ EVP_PKEY_free(private_key);
+ BIO_free(bd_in);
+ DISPLAY_OSSL_ERR(err < 0);
+ return err;
+}
diff --git a/tools/bpf/bpftool/token.c b/tools/bpf/bpftool/token.c
new file mode 100644
index 000000000000..c08f34b9d51b
--- /dev/null
+++ b/tools/bpf/bpftool/token.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2025 Didi Technology Co., Tao Chen */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <mntent.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "json_writer.h"
+#include "main.h"
+
+#define MOUNTS_FILE "/proc/mounts"
+
+static struct {
+ const char *header;
+ const char *key;
+} sets[] = {
+ {"allowed_cmds", "delegate_cmds"},
+ {"allowed_maps", "delegate_maps"},
+ {"allowed_progs", "delegate_progs"},
+ {"allowed_attachs", "delegate_attachs"},
+};
+
+static bool has_delegate_options(const char *mnt_ops)
+{
+ return strstr(mnt_ops, "delegate_cmds") ||
+ strstr(mnt_ops, "delegate_maps") ||
+ strstr(mnt_ops, "delegate_progs") ||
+ strstr(mnt_ops, "delegate_attachs");
+}
+
+static char *get_delegate_value(char *opts, const char *key)
+{
+ char *token, *rest, *ret = NULL;
+
+ if (!opts)
+ return NULL;
+
+ for (token = strtok_r(opts, ",", &rest); token;
+ token = strtok_r(NULL, ",", &rest)) {
+ if (strncmp(token, key, strlen(key)) == 0 &&
+ token[strlen(key)] == '=') {
+ ret = token + strlen(key) + 1;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static void print_items_per_line(char *input, int items_per_line)
+{
+ char *str, *rest;
+ int cnt = 0;
+
+ if (!input)
+ return;
+
+ for (str = strtok_r(input, ":", &rest); str;
+ str = strtok_r(NULL, ":", &rest)) {
+ if (cnt % items_per_line == 0)
+ printf("\n\t ");
+
+ printf("%-20s", str);
+ cnt++;
+ }
+}
+
+#define ITEMS_PER_LINE 4
+static void show_token_info_plain(struct mntent *mntent)
+{
+ size_t i;
+
+ printf("token_info %s", mntent->mnt_dir);
+
+ for (i = 0; i < ARRAY_SIZE(sets); i++) {
+ char *opts, *value;
+
+ printf("\n\t%s:", sets[i].header);
+ opts = strdup(mntent->mnt_opts);
+ value = get_delegate_value(opts, sets[i].key);
+ print_items_per_line(value, ITEMS_PER_LINE);
+ free(opts);
+ }
+
+ printf("\n");
+}
+
+static void split_json_array_str(char *input)
+{
+ char *str, *rest;
+
+ if (!input) {
+ jsonw_start_array(json_wtr);
+ jsonw_end_array(json_wtr);
+ return;
+ }
+
+ jsonw_start_array(json_wtr);
+ for (str = strtok_r(input, ":", &rest); str;
+ str = strtok_r(NULL, ":", &rest)) {
+ jsonw_string(json_wtr, str);
+ }
+ jsonw_end_array(json_wtr);
+}
+
+static void show_token_info_json(struct mntent *mntent)
+{
+ size_t i;
+
+ jsonw_start_object(json_wtr);
+ jsonw_string_field(json_wtr, "token_info", mntent->mnt_dir);
+
+ for (i = 0; i < ARRAY_SIZE(sets); i++) {
+ char *opts, *value;
+
+ jsonw_name(json_wtr, sets[i].header);
+ opts = strdup(mntent->mnt_opts);
+ value = get_delegate_value(opts, sets[i].key);
+ split_json_array_str(value);
+ free(opts);
+ }
+
+ jsonw_end_object(json_wtr);
+}
+
+static int __show_token_info(struct mntent *mntent)
+{
+ if (json_output)
+ show_token_info_json(mntent);
+ else
+ show_token_info_plain(mntent);
+
+ return 0;
+}
+
+static int show_token_info(void)
+{
+ FILE *fp;
+ struct mntent *ent;
+
+ fp = setmntent(MOUNTS_FILE, "r");
+ if (!fp) {
+ p_err("Failed to open: %s", MOUNTS_FILE);
+ return -1;
+ }
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+
+ while ((ent = getmntent(fp)) != NULL) {
+ if (strncmp(ent->mnt_type, "bpf", 3) == 0) {
+ if (has_delegate_options(ent->mnt_opts))
+ __show_token_info(ent);
+ }
+ }
+
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ endmntent(fp);
+
+ return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+ if (argc)
+ return BAD_ARG();
+
+ return show_token_info();
+}
+
+static int do_help(int argc, char **argv)
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %1$s %2$s { show | list }\n"
+ " %1$s %2$s help\n"
+ " " HELP_SPEC_OPTIONS " }\n"
+ "\n"
+ "",
+ bin_name, argv[-2]);
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "show", do_show },
+ { "list", do_show },
+ { "help", do_help },
+ { 0 }
+};
+
+int do_token(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/tracelog.c b/tools/bpf/bpftool/tracelog.c
index 31d806e3bdaa..573a8d99f009 100644
--- a/tools/bpf/bpftool/tracelog.c
+++ b/tools/bpf/bpftool/tracelog.c
@@ -57,10 +57,8 @@ find_tracefs_mnt_single(unsigned long magic, char *mnt, const char *mntpt)
static bool get_tracefs_pipe(char *mnt)
{
static const char * const known_mnts[] = {
- "/sys/kernel/debug/tracing",
"/sys/kernel/tracing",
- "/tracing",
- "/trace",
+ "/sys/kernel/debug/tracing",
};
const char *pipe_name = "/trace_pipe";
const char *fstype = "tracefs";
@@ -95,12 +93,7 @@ static bool get_tracefs_pipe(char *mnt)
return false;
p_info("could not find tracefs, attempting to mount it now");
- /* Most of the time, tracefs is automatically mounted by debugfs at
- * /sys/kernel/debug/tracing when we try to access it. If we could not
- * find it, it is likely that debugfs is not mounted. Let's give one
- * attempt at mounting just tracefs at /sys/kernel/tracing.
- */
- strcpy(mnt, known_mnts[1]);
+ strcpy(mnt, known_mnts[0]);
if (mount_tracefs(mnt))
return false;
diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile
index ed565eb52275..342e056c8c66 100644
--- a/tools/gpio/Makefile
+++ b/tools/gpio/Makefile
@@ -77,7 +77,7 @@ $(OUTPUT)gpio-watch: $(GPIO_WATCH_IN)
clean:
rm -f $(ALL_PROGRAMS)
- rm -f $(OUTPUT)include/linux/gpio.h
+ rm -rf $(OUTPUT)include
find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.cmd' -delete
install: $(ALL_PROGRAMS)
diff --git a/tools/include/linux/args.h b/tools/include/linux/args.h
new file mode 100644
index 000000000000..2e8e65d975c7
--- /dev/null
+++ b/tools/include/linux/args.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_ARGS_H
+#define _LINUX_ARGS_H
+
+/*
+ * How do these macros work?
+ *
+ * In __COUNT_ARGS() _0 to _12 are just placeholders from the start
+ * in order to make sure _n is positioned over the correct number
+ * from 12 to 0 (depending on X, which is a variadic argument list).
+ * They serve no purpose other than occupying a position. Since each
+ * macro parameter must have a distinct identifier, those identifiers
+ * are as good as any.
+ *
+ * In COUNT_ARGS() we use actual integers, so __COUNT_ARGS() returns
+ * that as _n.
+ */
+
+/* This counts to 15. Any more, it will return 16th argument. */
+#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _n, X...) _n
+#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+
+/* Concatenate two parameters, but allow them to be expanded beforehand. */
+#define __CONCAT(a, b) a ## b
+#define CONCATENATE(a, b) __CONCAT(a, b)
+
+#endif /* _LINUX_ARGS_H */
diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h
index 7ad056219115..a40cc861b3a7 100644
--- a/tools/include/linux/bits.h
+++ b/tools/include/linux/bits.h
@@ -2,10 +2,8 @@
#ifndef __LINUX_BITS_H
#define __LINUX_BITS_H
-#include <linux/const.h>
#include <vdso/bits.h>
#include <uapi/linux/bits.h>
-#include <asm/bitsperlong.h>
#define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG))
#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
@@ -50,10 +48,14 @@
(type_max(t) << (l) & \
type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h)))))
+#define GENMASK(h, l) GENMASK_TYPE(unsigned long, h, l)
+#define GENMASK_ULL(h, l) GENMASK_TYPE(unsigned long long, h, l)
+
#define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l)
#define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l)
#define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l)
#define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l)
+#define GENMASK_U128(h, l) GENMASK_TYPE(u128, h, l)
/*
* Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The
@@ -79,28 +81,9 @@
* BUILD_BUG_ON_ZERO is not available in h files included from asm files,
* disable the input check if that is the case.
*/
-#define GENMASK_INPUT_CHECK(h, l) 0
+#define GENMASK(h, l) __GENMASK(h, l)
+#define GENMASK_ULL(h, l) __GENMASK_ULL(h, l)
#endif /* !defined(__ASSEMBLY__) */
-#define GENMASK(h, l) \
- (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
-#define GENMASK_ULL(h, l) \
- (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l))
-
-#if !defined(__ASSEMBLY__)
-/*
- * Missing asm support
- *
- * __GENMASK_U128() depends on _BIT128() which would not work
- * in the asm code, as it shifts an 'unsigned __int128' data
- * type instead of direct representation of 128 bit constants
- * such as long and unsigned long. The fundamental problem is
- * that a 128 bit constant will get silently truncated by the
- * gcc compiler.
- */
-#define GENMASK_U128(h, l) \
- (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l))
-#endif
-
#endif /* __LINUX_BITS_H */
diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h
index 6b8713675765..fb8d90bff92e 100644
--- a/tools/include/linux/cfi_types.h
+++ b/tools/include/linux/cfi_types.h
@@ -8,7 +8,7 @@
#ifdef __ASSEMBLY__
#include <linux/linkage.h>
-#ifdef CONFIG_CFI_CLANG
+#ifdef CONFIG_CFI
/*
* Use the __kcfi_typeid_<function> type identifier symbol to
* annotate indirectly called assembly functions. The compiler emits
@@ -29,17 +29,40 @@
#define SYM_TYPED_START(name, linkage, align...) \
SYM_TYPED_ENTRY(name, linkage, align)
-#else /* CONFIG_CFI_CLANG */
+#else /* CONFIG_CFI */
#define SYM_TYPED_START(name, linkage, align...) \
SYM_START(name, linkage, align)
-#endif /* CONFIG_CFI_CLANG */
+#endif /* CONFIG_CFI */
#ifndef SYM_TYPED_FUNC_START
#define SYM_TYPED_FUNC_START(name) \
SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
#endif
+#else /* __ASSEMBLY__ */
+
+#ifdef CONFIG_CFI_CLANG
+#define DEFINE_CFI_TYPE(name, func) \
+ /* \
+ * Force a reference to the function so the compiler generates \
+ * __kcfi_typeid_<func>. \
+ */ \
+ __ADDRESSABLE(func); \
+ /* u32 name __ro_after_init = __kcfi_typeid_<func> */ \
+ extern u32 name; \
+ asm ( \
+ " .pushsection .data..ro_after_init,\"aw\",\%progbits \n" \
+ " .type " #name ",\%object \n" \
+ " .globl " #name " \n" \
+ " .p2align 2, 0x0 \n" \
+ #name ": \n" \
+ " .4byte __kcfi_typeid_" #func " \n" \
+ " .size " #name ", 4 \n" \
+ " .popsection \n" \
+ );
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* _LINUX_CFI_TYPES_H */
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 2892a45023af..04e0077fb4c9 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -852,8 +852,14 @@ __SYSCALL(__NR_removexattrat, sys_removexattrat)
#define __NR_open_tree_attr 467
__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr)
+/* fs/inode.c */
+#define __NR_file_getattr 468
+__SYSCALL(__NR_file_getattr, sys_file_getattr)
+#define __NR_file_setattr 469
+__SYSCALL(__NR_file_setattr, sys_file_setattr)
+
#undef __NR_syscalls
-#define __NR_syscalls 468
+#define __NR_syscalls 470
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 233de8677382..ae83d8649ef1 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1522,6 +1522,12 @@ union bpf_attr {
* If provided, map_flags should have BPF_F_TOKEN_FD flag set.
*/
__s32 map_token_fd;
+
+ /* Hash of the program that has exclusive access to the map.
+ */
+ __aligned_u64 excl_prog_hash;
+ /* Size of the passed excl_prog_hash. */
+ __u32 excl_prog_hash_size;
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */
@@ -1605,6 +1611,16 @@ union bpf_attr {
* continuous.
*/
__u32 fd_array_cnt;
+ /* Pointer to a buffer containing the signature of the BPF
+ * program.
+ */
+ __aligned_u64 signature;
+ /* Size of the signature buffer in bytes. */
+ __u32 signature_size;
+ /* ID of the kernel keyring to be used for signature
+ * verification.
+ */
+ __s32 keyring_id;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -6666,6 +6682,8 @@ struct bpf_map_info {
__u32 btf_value_type_id;
__u32 btf_vmlinux_id;
__u64 map_extra;
+ __aligned_u64 hash;
+ __u32 hash_size;
} __attribute__((aligned(8)));
struct bpf_btf_info {
@@ -7418,6 +7436,10 @@ struct bpf_timer {
__u64 __opaque[2];
} __attribute__((aligned(8)));
+struct bpf_task_work {
+ __u64 __opaque;
+} __attribute__((aligned(8)));
+
struct bpf_wq {
__u64 __opaque[2];
} __attribute__((aligned(8)));
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 7415a3863891..f0f0d49d2544 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -178,6 +178,7 @@ struct kvm_xen_exit {
#define KVM_EXIT_NOTIFY 37
#define KVM_EXIT_LOONGARCH_IOCSR 38
#define KVM_EXIT_MEMORY_FAULT 39
+#define KVM_EXIT_TDX 40
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -447,6 +448,31 @@ struct kvm_run {
__u64 gpa;
__u64 size;
} memory_fault;
+ /* KVM_EXIT_TDX */
+ struct {
+ __u64 flags;
+ __u64 nr;
+ union {
+ struct {
+ __u64 ret;
+ __u64 data[5];
+ } unknown;
+ struct {
+ __u64 ret;
+ __u64 gpa;
+ __u64 size;
+ } get_quote;
+ struct {
+ __u64 ret;
+ __u64 leaf;
+ __u64 r11, r12, r13, r14;
+ } get_tdvmcall_info;
+ struct {
+ __u64 ret;
+ __u64 vector;
+ } setup_event_notify;
+ };
+ } tdx;
/* Fix the size of the union. */
char padding[256];
};
@@ -935,6 +961,7 @@ struct kvm_enable_cap {
#define KVM_CAP_ARM_EL2 240
#define KVM_CAP_ARM_EL2_E2H0 241
#define KVM_CAP_RISCV_MP_STATE_RESET 242
+#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
struct kvm_irq_routing_irqchip {
__u32 irqchip;
diff --git a/tools/include/uapi/linux/nsfs.h b/tools/include/uapi/linux/nsfs.h
index 34127653fd00..33c9b578b3b2 100644
--- a/tools/include/uapi/linux/nsfs.h
+++ b/tools/include/uapi/linux/nsfs.h
@@ -16,8 +16,6 @@
#define NS_GET_NSTYPE _IO(NSIO, 0x3)
/* Get owner UID (in the caller's user namespace) for a user namespace */
#define NS_GET_OWNER_UID _IO(NSIO, 0x4)
-/* Get the id for a mount namespace */
-#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64)
/* Translate pid from target pid namespace into the caller's pid namespace. */
#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int)
/* Return thread-group leader id of pid in the callers pid namespace. */
@@ -42,4 +40,19 @@ struct mnt_ns_info {
/* Get previous namespace. */
#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info)
+/* Retrieve namespace identifiers. */
+#define NS_GET_MNTNS_ID _IOR(NSIO, 5, __u64)
+#define NS_GET_ID _IOR(NSIO, 13, __u64)
+
+enum init_ns_ino {
+ IPC_NS_INIT_INO = 0xEFFFFFFFU,
+ UTS_NS_INIT_INO = 0xEFFFFFFEU,
+ USER_NS_INIT_INO = 0xEFFFFFFDU,
+ PID_NS_INIT_INO = 0xEFFFFFFCU,
+ CGROUP_NS_INIT_INO = 0xEFFFFFFBU,
+ TIME_NS_INIT_INO = 0xEFFFFFFAU,
+ NET_NS_INIT_INO = 0xEFFFFFF9U,
+ MNT_NS_INIT_INO = 0xEFFFFFF8U,
+};
+
#endif /* __LINUX_NSFS_H */
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index ab40dbf9f020..339b19797237 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -172,7 +172,7 @@ int bpf_map_create(enum bpf_map_type map_type,
__u32 max_entries,
const struct bpf_map_create_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd);
+ const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size);
union bpf_attr attr;
int fd;
@@ -203,6 +203,8 @@ int bpf_map_create(enum bpf_map_type map_type,
attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
attr.map_token_fd = OPTS_GET(opts, token_fd, 0);
+ attr.excl_prog_hash = ptr_to_u64(OPTS_GET(opts, excl_prog_hash, NULL));
+ attr.excl_prog_hash_size = OPTS_GET(opts, excl_prog_hash_size, 0);
fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
return libbpf_err_errno(fd);
@@ -238,7 +240,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, size_t insn_cnt,
struct bpf_prog_load_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, fd_array_cnt);
+ const size_t attr_sz = offsetofend(union bpf_attr, keyring_id);
void *finfo = NULL, *linfo = NULL;
const char *func_info, *line_info;
__u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 7252150e7ad3..e983a3e40d61 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -54,9 +54,12 @@ struct bpf_map_create_opts {
__s32 value_type_btf_obj_fd;
__u32 token_fd;
+
+ const void *excl_prog_hash;
+ __u32 excl_prog_hash_size;
size_t :0;
};
-#define bpf_map_create_opts__last_field token_fd
+#define bpf_map_create_opts__last_field excl_prog_hash_size
LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
const char *map_name,
diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h
index 6ff963a491d9..49af4260b8e6 100644
--- a/tools/lib/bpf/bpf_gen_internal.h
+++ b/tools/lib/bpf/bpf_gen_internal.h
@@ -4,6 +4,7 @@
#define __BPF_GEN_INTERNAL_H
#include "bpf.h"
+#include "libbpf_internal.h"
struct ksym_relo_desc {
const char *name;
@@ -50,6 +51,7 @@ struct bpf_gen {
__u32 nr_ksyms;
int fd_array;
int nr_fd_array;
+ int hash_insn_offset[SHA256_DWORD_SIZE];
};
void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps);
diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
index 113ae4abd345..6945dd99a846 100644
--- a/tools/lib/bpf/gen_loader.c
+++ b/tools/lib/bpf/gen_loader.c
@@ -110,6 +110,7 @@ static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn in
static int add_data(struct bpf_gen *gen, const void *data, __u32 size);
static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off);
+static void emit_signature_match(struct bpf_gen *gen);
void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps)
{
@@ -152,6 +153,8 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps
/* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */
emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
emit(gen, BPF_EXIT_INSN());
+ if (OPTS_GET(gen->opts, gen_hash, false))
+ emit_signature_match(gen);
}
static int add_data(struct bpf_gen *gen, const void *data, __u32 size)
@@ -368,6 +371,8 @@ static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off)
__emit_sys_close(gen);
}
+static void compute_sha_update_offsets(struct bpf_gen *gen);
+
int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps)
{
int i;
@@ -394,6 +399,9 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps)
blob_fd_array_off(gen, i));
emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0));
emit(gen, BPF_EXIT_INSN());
+ if (OPTS_GET(gen->opts, gen_hash, false))
+ compute_sha_update_offsets(gen);
+
pr_debug("gen: finish %s\n", errstr(gen->error));
if (!gen->error) {
struct gen_loader_opts *opts = gen->opts;
@@ -446,6 +454,22 @@ void bpf_gen__free(struct bpf_gen *gen)
_val; \
})
+static void compute_sha_update_offsets(struct bpf_gen *gen)
+{
+ __u64 sha[SHA256_DWORD_SIZE];
+ __u64 sha_dw;
+ int i;
+
+ libbpf_sha256(gen->data_start, gen->data_cur - gen->data_start, (__u8 *)sha);
+ for (i = 0; i < SHA256_DWORD_SIZE; i++) {
+ struct bpf_insn *insn =
+ (struct bpf_insn *)(gen->insn_start + gen->hash_insn_offset[i]);
+ sha_dw = tgt_endian(sha[i]);
+ insn[0].imm = (__u32)sha_dw;
+ insn[1].imm = sha_dw >> 32;
+ }
+}
+
void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data,
__u32 btf_raw_size)
{
@@ -557,6 +581,29 @@ void bpf_gen__map_create(struct bpf_gen *gen,
emit_sys_close_stack(gen, stack_off(inner_map_fd));
}
+static void emit_signature_match(struct bpf_gen *gen)
+{
+ __s64 off;
+ int i;
+
+ for (i = 0; i < SHA256_DWORD_SIZE; i++) {
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX,
+ 0, 0, 0, 0));
+ emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, i * sizeof(__u64)));
+ gen->hash_insn_offset[i] = gen->insn_cur - gen->insn_start;
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_3, 0, 0, 0, 0, 0));
+
+ off = -(gen->insn_cur - gen->insn_start - gen->cleanup_label) / 8 - 1;
+ if (is_simm16(off)) {
+ emit(gen, BPF_MOV64_IMM(BPF_REG_7, -EINVAL));
+ emit(gen, BPF_JMP_REG(BPF_JNE, BPF_REG_2, BPF_REG_3, off));
+ } else {
+ gen->error = -ERANGE;
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, -1));
+ }
+ }
+}
+
void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name,
enum bpf_attach_type type)
{
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8f5a81b672e1..f92083f51bdb 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -35,6 +35,7 @@
#include <linux/perf_event.h>
#include <linux/bpf_perf_event.h>
#include <linux/ring_buffer.h>
+#include <linux/unaligned.h>
#include <sys/epoll.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
@@ -496,6 +497,7 @@ struct bpf_program {
__u32 line_info_rec_size;
__u32 line_info_cnt;
__u32 prog_flags;
+ __u8 hash[SHA256_DIGEST_LENGTH];
};
struct bpf_struct_ops {
@@ -575,6 +577,7 @@ struct bpf_map {
bool autocreate;
bool autoattach;
__u64 map_extra;
+ struct bpf_program *excl_prog;
};
enum extern_type {
@@ -1013,35 +1016,33 @@ find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
const struct btf_member *kern_data_member;
struct btf *btf = NULL;
__s32 kern_vtype_id, kern_type_id;
- char tname[256];
+ char tname[192], stname[256];
__u32 i;
snprintf(tname, sizeof(tname), "%.*s",
(int)bpf_core_essential_name_len(tname_raw), tname_raw);
- kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT,
- &btf, mod_btf);
- if (kern_type_id < 0) {
- pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
- tname);
- return kern_type_id;
- }
- kern_type = btf__type_by_id(btf, kern_type_id);
+ snprintf(stname, sizeof(stname), "%s%s", STRUCT_OPS_VALUE_PREFIX, tname);
- /* Find the corresponding "map_value" type that will be used
- * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example,
- * find "struct bpf_struct_ops_tcp_congestion_ops" from the
- * btf_vmlinux.
+ /* Look for the corresponding "map_value" type that will be used
+ * in map_update(BPF_MAP_TYPE_STRUCT_OPS) first, figure out the btf
+ * and the mod_btf.
+ * For example, find "struct bpf_struct_ops_tcp_congestion_ops".
*/
- kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
- tname, BTF_KIND_STRUCT);
+ kern_vtype_id = find_ksym_btf_id(obj, stname, BTF_KIND_STRUCT, &btf, mod_btf);
if (kern_vtype_id < 0) {
- pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
- STRUCT_OPS_VALUE_PREFIX, tname);
+ pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", stname);
return kern_vtype_id;
}
kern_vtype = btf__type_by_id(btf, kern_vtype_id);
+ kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
+ if (kern_type_id < 0) {
+ pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", tname);
+ return kern_type_id;
+ }
+ kern_type = btf__type_by_id(btf, kern_type_id);
+
/* Find "struct tcp_congestion_ops" from
* struct bpf_struct_ops_tcp_congestion_ops {
* [ ... ]
@@ -1054,8 +1055,8 @@ find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
break;
}
if (i == btf_vlen(kern_vtype)) {
- pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
- tname, STRUCT_OPS_VALUE_PREFIX, tname);
+ pr_warn("struct_ops init_kern: struct %s data is not found in struct %s\n",
+ tname, stname);
return -EINVAL;
}
@@ -4485,6 +4486,44 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
}
}
+static int bpf_prog_compute_hash(struct bpf_program *prog)
+{
+ struct bpf_insn *purged;
+ int i, err = 0;
+
+ purged = calloc(prog->insns_cnt, BPF_INSN_SZ);
+ if (!purged)
+ return -ENOMEM;
+
+ /* If relocations have been done, the map_fd needs to be
+ * discarded for the digest calculation.
+ */
+ for (i = 0; i < prog->insns_cnt; i++) {
+ purged[i] = prog->insns[i];
+ if (purged[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
+ (purged[i].src_reg == BPF_PSEUDO_MAP_FD ||
+ purged[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
+ purged[i].imm = 0;
+ i++;
+ if (i >= prog->insns_cnt ||
+ prog->insns[i].code != 0 ||
+ prog->insns[i].dst_reg != 0 ||
+ prog->insns[i].src_reg != 0 ||
+ prog->insns[i].off != 0) {
+ err = -EINVAL;
+ goto out;
+ }
+ purged[i] = prog->insns[i];
+ purged[i].imm = 0;
+ }
+ }
+ libbpf_sha256(purged, prog->insns_cnt * sizeof(struct bpf_insn),
+ prog->hash);
+out:
+ free(purged);
+ return err;
+}
+
static int bpf_program__record_reloc(struct bpf_program *prog,
struct reloc_desc *reloc_desc,
__u32 insn_idx, const char *sym_name,
@@ -5093,6 +5132,16 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
return false;
}
+ /*
+ * bpf_get_map_info_by_fd() for DEVMAP will always return flags with
+ * BPF_F_RDONLY_PROG set, but it generally is not set at map creation time.
+ * Thus, ignore the BPF_F_RDONLY_PROG flag in the flags returned from
+ * bpf_get_map_info_by_fd() when checking for compatibility with an
+ * existing DEVMAP.
+ */
+ if (map->def.type == BPF_MAP_TYPE_DEVMAP || map->def.type == BPF_MAP_TYPE_DEVMAP_HASH)
+ map_info.map_flags &= ~BPF_F_RDONLY_PROG;
+
return (map_info.type == map->def.type &&
map_info.key_size == map->def.key_size &&
map_info.value_size == map->def.value_size &&
@@ -5224,6 +5273,14 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
create_attr.token_fd = obj->token_fd;
if (obj->token_fd)
create_attr.map_flags |= BPF_F_TOKEN_FD;
+ if (map->excl_prog) {
+ err = bpf_prog_compute_hash(map->excl_prog);
+ if (err)
+ return err;
+
+ create_attr.excl_prog_hash = map->excl_prog->hash;
+ create_attr.excl_prog_hash_size = SHA256_DIGEST_LENGTH;
+ }
if (bpf_map__is_struct_ops(map)) {
create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
@@ -10514,6 +10571,27 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
return 0;
}
+int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog)
+{
+ if (map_is_created(map)) {
+ pr_warn("exclusive programs must be set before map creation\n");
+ return libbpf_err(-EINVAL);
+ }
+
+ if (map->obj != prog->obj) {
+ pr_warn("excl_prog and map must be from the same bpf object\n");
+ return libbpf_err(-EINVAL);
+ }
+
+ map->excl_prog = prog;
+ return 0;
+}
+
+struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map)
+{
+ return map->excl_prog;
+}
+
static struct bpf_map *
__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
{
@@ -14207,3 +14285,100 @@ void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
free(s->progs);
free(s);
}
+
+static inline __u32 ror32(__u32 v, int bits)
+{
+ return (v >> bits) | (v << (32 - bits));
+}
+
+#define SHA256_BLOCK_LENGTH 64
+#define Ch(x, y, z) (((x) & (y)) ^ (~(x) & (z)))
+#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+#define Sigma_0(x) (ror32((x), 2) ^ ror32((x), 13) ^ ror32((x), 22))
+#define Sigma_1(x) (ror32((x), 6) ^ ror32((x), 11) ^ ror32((x), 25))
+#define sigma_0(x) (ror32((x), 7) ^ ror32((x), 18) ^ ((x) >> 3))
+#define sigma_1(x) (ror32((x), 17) ^ ror32((x), 19) ^ ((x) >> 10))
+
+static const __u32 sha256_K[64] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1,
+ 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786,
+ 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147,
+ 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b,
+ 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a,
+ 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
+};
+
+#define SHA256_ROUND(i, a, b, c, d, e, f, g, h) \
+ { \
+ __u32 tmp = h + Sigma_1(e) + Ch(e, f, g) + sha256_K[i] + w[i]; \
+ d += tmp; \
+ h = tmp + Sigma_0(a) + Maj(a, b, c); \
+ }
+
+static void sha256_blocks(__u32 state[8], const __u8 *data, size_t nblocks)
+{
+ while (nblocks--) {
+ __u32 a = state[0];
+ __u32 b = state[1];
+ __u32 c = state[2];
+ __u32 d = state[3];
+ __u32 e = state[4];
+ __u32 f = state[5];
+ __u32 g = state[6];
+ __u32 h = state[7];
+ __u32 w[64];
+ int i;
+
+ for (i = 0; i < 16; i++)
+ w[i] = get_unaligned_be32(&data[4 * i]);
+ for (; i < ARRAY_SIZE(w); i++)
+ w[i] = sigma_1(w[i - 2]) + w[i - 7] +
+ sigma_0(w[i - 15]) + w[i - 16];
+ for (i = 0; i < ARRAY_SIZE(w); i += 8) {
+ SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h);
+ SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g);
+ SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f);
+ SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e);
+ SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d);
+ SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c);
+ SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b);
+ SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a);
+ }
+ state[0] += a;
+ state[1] += b;
+ state[2] += c;
+ state[3] += d;
+ state[4] += e;
+ state[5] += f;
+ state[6] += g;
+ state[7] += h;
+ data += SHA256_BLOCK_LENGTH;
+ }
+}
+
+void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH])
+{
+ __u32 state[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
+ 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
+ const __be64 bitcount = cpu_to_be64((__u64)len * 8);
+ __u8 final_data[2 * SHA256_BLOCK_LENGTH] = { 0 };
+ size_t final_len = len % SHA256_BLOCK_LENGTH;
+ int i;
+
+ sha256_blocks(state, data, len / SHA256_BLOCK_LENGTH);
+
+ memcpy(final_data, data + len - final_len, final_len);
+ final_data[final_len] = 0x80;
+ final_len = round_up(final_len + 9, SHA256_BLOCK_LENGTH);
+ memcpy(&final_data[final_len - 8], &bitcount, 8);
+
+ sha256_blocks(state, final_data, final_len / SHA256_BLOCK_LENGTH);
+
+ for (i = 0; i < ARRAY_SIZE(state); i++)
+ put_unaligned_be32(state[i], &out[4 * i]);
+}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 455a957cb702..5118d0a90e24 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -24,8 +24,25 @@
extern "C" {
#endif
+/**
+ * @brief **libbpf_major_version()** provides the major version of libbpf.
+ * @return An integer, the major version number
+ */
LIBBPF_API __u32 libbpf_major_version(void);
+
+/**
+ * @brief **libbpf_minor_version()** provides the minor version of libbpf.
+ * @return An integer, the minor version number
+ */
LIBBPF_API __u32 libbpf_minor_version(void);
+
+/**
+ * @brief **libbpf_version_string()** provides the version of libbpf in a
+ * human-readable form, e.g., "v1.7".
+ * @return Pointer to a static string containing the version
+ *
+ * The format is *not* a part of a stable API and may change in the future.
+ */
LIBBPF_API const char *libbpf_version_string(void);
enum libbpf_errno {
@@ -49,6 +66,14 @@ enum libbpf_errno {
__LIBBPF_ERRNO__END,
};
+/**
+ * @brief **libbpf_strerror()** converts the provided error code into a
+ * human-readable string.
+ * @param err The error code to convert
+ * @param buf Pointer to a buffer where the error message will be stored
+ * @param size The number of bytes in the buffer
+ * @return 0, on success; negative error code, otherwise
+ */
LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size);
/**
@@ -252,7 +277,7 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
* @return 0, on success; negative error code, otherwise, error code is
* stored in errno
*/
-int bpf_object__prepare(struct bpf_object *obj);
+LIBBPF_API int bpf_object__prepare(struct bpf_object *obj);
/**
* @brief **bpf_object__load()** loads BPF object into kernel.
@@ -1266,6 +1291,28 @@ LIBBPF_API int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
*/
LIBBPF_API int bpf_map__get_next_key(const struct bpf_map *map,
const void *cur_key, void *next_key, size_t key_sz);
+/**
+ * @brief **bpf_map__set_exclusive_program()** sets a map to be exclusive to the
+ * specified program. This must be called *before* the map is created.
+ *
+ * @param map BPF map to make exclusive.
+ * @param prog BPF program to be the exclusive user of the map. Must belong
+ * to the same bpf_object as the map.
+ * @return 0 on success; a negative error code otherwise.
+ *
+ * This function must be called after the BPF object is opened but before
+ * it is loaded. Once the object is loaded, only the specified program
+ * will be able to access the map's contents.
+ */
+LIBBPF_API int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog);
+
+/**
+ * @brief **bpf_map__exclusive_program()** returns the exclusive program
+ * that is registered with the map (if any).
+ * @param map BPF map to which the exclusive program is registered.
+ * @return the registered exclusive program.
+ */
+LIBBPF_API struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map);
struct bpf_xdp_set_link_opts {
size_t sz;
@@ -1810,9 +1857,10 @@ struct gen_loader_opts {
const char *insns;
__u32 data_sz;
__u32 insns_sz;
+ bool gen_hash;
};
-#define gen_loader_opts__last_field insns_sz
+#define gen_loader_opts__last_field gen_hash
LIBBPF_API int bpf_object__gen_loader(struct bpf_object *obj,
struct gen_loader_opts *opts);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index d7bd463e7017..8ed8749907d4 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -448,4 +448,7 @@ LIBBPF_1.6.0 {
} LIBBPF_1.5.0;
LIBBPF_1.7.0 {
+ global:
+ bpf_map__set_exclusive_program;
+ bpf_map__exclusive_program;
} LIBBPF_1.6.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 477a3b3389a0..c93797dcaf5b 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -736,4 +736,8 @@ int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern,
int probe_fd(int fd);
+#define SHA256_DIGEST_LENGTH 32
+#define SHA256_DWORD_SIZE SHA256_DIGEST_LENGTH / sizeof(__u64)
+
+void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]);
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
index 4d5fa079b5d6..6a8f5c7a02eb 100644
--- a/tools/lib/bpf/skel_internal.h
+++ b/tools/lib/bpf/skel_internal.h
@@ -13,10 +13,15 @@
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/mman.h>
+#include <linux/keyctl.h>
#include <stdlib.h>
#include "bpf.h"
#endif
+#ifndef SHA256_DIGEST_LENGTH
+#define SHA256_DIGEST_LENGTH 32
+#endif
+
#ifndef __NR_bpf
# if defined(__mips__) && defined(_ABIO32)
# define __NR_bpf 4355
@@ -64,6 +69,11 @@ struct bpf_load_and_run_opts {
__u32 data_sz;
__u32 insns_sz;
const char *errstr;
+ void *signature;
+ __u32 signature_sz;
+ __s32 keyring_id;
+ void *excl_prog_hash;
+ __u32 excl_prog_hash_sz;
};
long kern_sys_bpf(__u32 cmd, void *attr, __u32 attr_size);
@@ -220,14 +230,19 @@ static inline int skel_map_create(enum bpf_map_type map_type,
const char *map_name,
__u32 key_size,
__u32 value_size,
- __u32 max_entries)
+ __u32 max_entries,
+ const void *excl_prog_hash,
+ __u32 excl_prog_hash_sz)
{
- const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
+ const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size);
union bpf_attr attr;
memset(&attr, 0, attr_sz);
attr.map_type = map_type;
+ attr.excl_prog_hash = (unsigned long) excl_prog_hash;
+ attr.excl_prog_hash_size = excl_prog_hash_sz;
+
strncpy(attr.map_name, map_name, sizeof(attr.map_name));
attr.key_size = key_size;
attr.value_size = value_size;
@@ -300,6 +315,35 @@ static inline int skel_link_create(int prog_fd, int target_fd,
return skel_sys_bpf(BPF_LINK_CREATE, &attr, attr_sz);
}
+static inline int skel_obj_get_info_by_fd(int fd)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, info);
+ __u8 sha[SHA256_DIGEST_LENGTH];
+ struct bpf_map_info info;
+ __u32 info_len = sizeof(info);
+ union bpf_attr attr;
+
+ memset(&info, 0, sizeof(info));
+ info.hash = (long) &sha;
+ info.hash_size = SHA256_DIGEST_LENGTH;
+
+ memset(&attr, 0, attr_sz);
+ attr.info.bpf_fd = fd;
+ attr.info.info = (long) &info;
+ attr.info.info_len = info_len;
+ return skel_sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, attr_sz);
+}
+
+static inline int skel_map_freeze(int fd)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, map_fd);
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_sz);
+ attr.map_fd = fd;
+
+ return skel_sys_bpf(BPF_MAP_FREEZE, &attr, attr_sz);
+}
#ifdef __KERNEL__
#define set_err
#else
@@ -308,12 +352,13 @@ static inline int skel_link_create(int prog_fd, int target_fd,
static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
{
- const size_t prog_load_attr_sz = offsetofend(union bpf_attr, fd_array);
+ const size_t prog_load_attr_sz = offsetofend(union bpf_attr, keyring_id);
const size_t test_run_attr_sz = offsetofend(union bpf_attr, test);
int map_fd = -1, prog_fd = -1, key = 0, err;
union bpf_attr attr;
- err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1);
+ err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1,
+ opts->excl_prog_hash, opts->excl_prog_hash_sz);
if (map_fd < 0) {
opts->errstr = "failed to create loader map";
set_err;
@@ -327,11 +372,34 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
goto out;
}
+#ifndef __KERNEL__
+ err = skel_map_freeze(map_fd);
+ if (err < 0) {
+ opts->errstr = "failed to freeze map";
+ set_err;
+ goto out;
+ }
+ err = skel_obj_get_info_by_fd(map_fd);
+ if (err < 0) {
+ opts->errstr = "failed to fetch obj info";
+ set_err;
+ goto out;
+ }
+#endif
+
memset(&attr, 0, prog_load_attr_sz);
attr.prog_type = BPF_PROG_TYPE_SYSCALL;
attr.insns = (long) opts->insns;
attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn);
attr.license = (long) "Dual BSD/GPL";
+#ifndef __KERNEL__
+ attr.signature = (long) opts->signature;
+ attr.signature_size = opts->signature_sz;
+#else
+ if (opts->signature || opts->signature_sz)
+ pr_warn("signatures are not supported from bpf_preload\n");
+#endif
+ attr.keyring_id = opts->keyring_id;
memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog"));
attr.fd_array = (long) &map_fd;
attr.log_level = opts->ctx->log_level;
diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h
index 2a7865c8e3fe..43deb05a5197 100644
--- a/tools/lib/bpf/usdt.bpf.h
+++ b/tools/lib/bpf/usdt.bpf.h
@@ -34,13 +34,32 @@ enum __bpf_usdt_arg_type {
BPF_USDT_ARG_CONST,
BPF_USDT_ARG_REG,
BPF_USDT_ARG_REG_DEREF,
+ BPF_USDT_ARG_SIB,
};
+/*
+ * This struct layout is designed specifically to be backwards/forward
+ * compatible between libbpf versions for ARG_CONST, ARG_REG, and
+ * ARG_REG_DEREF modes. ARG_SIB requires libbpf v1.7+.
+ */
struct __bpf_usdt_arg_spec {
/* u64 scalar interpreted depending on arg_type, see below */
__u64 val_off;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
/* arg location case, see bpf_usdt_arg() for details */
- enum __bpf_usdt_arg_type arg_type;
+ enum __bpf_usdt_arg_type arg_type: 8;
+ /* index register offset within struct pt_regs */
+ __u16 idx_reg_off: 12;
+ /* scale factor for index register (1, 2, 4, or 8) */
+ __u16 scale_bitshift: 4;
+ /* reserved for future use, keeps reg_off offset stable */
+ __u8 __reserved: 8;
+#else
+ __u8 __reserved: 8;
+ __u16 idx_reg_off: 12;
+ __u16 scale_bitshift: 4;
+ enum __bpf_usdt_arg_type arg_type: 8;
+#endif
/* offset of referenced register within struct pt_regs */
short reg_off;
/* whether arg should be interpreted as signed value */
@@ -149,7 +168,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
{
struct __bpf_usdt_spec *spec;
struct __bpf_usdt_arg_spec *arg_spec;
- unsigned long val;
+ unsigned long val, idx;
int err, spec_id;
*res = 0;
@@ -204,6 +223,27 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
val >>= arg_spec->arg_bitshift;
#endif
break;
+ case BPF_USDT_ARG_SIB:
+ /* Arg is in memory addressed by SIB (Scale-Index-Base) mode
+ * (e.g., "-1@-96(%rbp,%rax,8)" in USDT arg spec). We first
+ * fetch the base register contents and the index register
+ * contents from pt_regs. Then we calculate the final address
+ * as base + (index * scale) + offset, and do a user-space
+ * probe read to fetch the argument value.
+ */
+ err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
+ if (err)
+ return err;
+ err = bpf_probe_read_kernel(&idx, sizeof(idx), (void *)ctx + arg_spec->idx_reg_off);
+ if (err)
+ return err;
+ err = bpf_probe_read_user(&val, sizeof(val), (void *)(val + (idx << arg_spec->scale_bitshift) + arg_spec->val_off));
+ if (err)
+ return err;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ val >>= arg_spec->arg_bitshift;
+#endif
+ break;
default:
return -EINVAL;
}
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index 3373b9d45ac4..fc2785eecc17 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -200,12 +200,23 @@ enum usdt_arg_type {
USDT_ARG_CONST,
USDT_ARG_REG,
USDT_ARG_REG_DEREF,
+ USDT_ARG_SIB,
};
/* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */
struct usdt_arg_spec {
__u64 val_off;
- enum usdt_arg_type arg_type;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ enum usdt_arg_type arg_type: 8;
+ __u16 idx_reg_off: 12;
+ __u16 scale_bitshift: 4;
+ __u8 __reserved: 8; /* keep reg_off offset stable */
+#else
+ __u8 __reserved: 8; /* keep reg_off offset stable */
+ __u16 idx_reg_off: 12;
+ __u16 scale_bitshift: 4;
+ enum usdt_arg_type arg_type: 8;
+#endif
short reg_off;
bool arg_signed;
char arg_bitshift;
@@ -570,9 +581,8 @@ static struct elf_seg *find_vma_seg(struct elf_seg *segs, size_t seg_cnt, long o
return NULL;
}
-static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
- const char *data, size_t name_off, size_t desc_off,
- struct usdt_note *usdt_note);
+static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off,
+ size_t desc_off, struct usdt_note *usdt_note);
static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie);
@@ -626,7 +636,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
struct elf_seg *seg = NULL;
void *tmp;
- err = parse_usdt_note(elf, path, &nhdr, data->d_buf, name_off, desc_off, &note);
+ err = parse_usdt_note(&nhdr, data->d_buf, name_off, desc_off, &note);
if (err)
goto err_out;
@@ -1132,8 +1142,7 @@ err_out:
/* Parse out USDT ELF note from '.note.stapsdt' section.
* Logic inspired by perf's code.
*/
-static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
- const char *data, size_t name_off, size_t desc_off,
+static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off,
struct usdt_note *note)
{
const char *provider, *name, *args;
@@ -1283,11 +1292,51 @@ static int calc_pt_regs_off(const char *reg_name)
static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
{
- char reg_name[16];
- int len, reg_off;
- long off;
+ char reg_name[16] = {0}, idx_reg_name[16] = {0};
+ int len, reg_off, idx_reg_off, scale = 1;
+ long off = 0;
+
+ if (sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^,] , %d ) %n",
+ arg_sz, &off, reg_name, idx_reg_name, &scale, &len) == 5 ||
+ sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^,] , %d ) %n",
+ arg_sz, reg_name, idx_reg_name, &scale, &len) == 4 ||
+ sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^)] ) %n",
+ arg_sz, &off, reg_name, idx_reg_name, &len) == 4 ||
+ sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^)] ) %n",
+ arg_sz, reg_name, idx_reg_name, &len) == 3
+ ) {
+ /*
+ * Scale Index Base case:
+ * 1@-96(%rbp,%rax,8)
+ * 1@(%rbp,%rax,8)
+ * 1@-96(%rbp,%rax)
+ * 1@(%rbp,%rax)
+ */
+ arg->arg_type = USDT_ARG_SIB;
+ arg->val_off = off;
- if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", arg_sz, &off, reg_name, &len) == 3) {
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+
+ idx_reg_off = calc_pt_regs_off(idx_reg_name);
+ if (idx_reg_off < 0)
+ return idx_reg_off;
+ arg->idx_reg_off = idx_reg_off;
+
+ /* validate scale factor and set fields directly */
+ switch (scale) {
+ case 1: arg->scale_bitshift = 0; break;
+ case 2: arg->scale_bitshift = 1; break;
+ case 4: arg->scale_bitshift = 2; break;
+ case 8: arg->scale_bitshift = 3; break;
+ default:
+ pr_warn("usdt: invalid SIB scale %d, expected 1, 2, 4, 8\n", scale);
+ return -EINVAL;
+ }
+ } else if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n",
+ arg_sz, &off, reg_name, &len) == 3) {
/* Memory dereference case, e.g., -4@-20(%rbp) */
arg->arg_type = USDT_ARG_REG_DEREF;
arg->val_off = off;
@@ -1306,6 +1355,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
} else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) {
/* Register read case, e.g., -4@%eax */
arg->arg_type = USDT_ARG_REG;
+ /* register read has no memory offset */
arg->val_off = 0;
reg_off = calc_pt_regs_off(reg_name);
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
index 9ef569492560..ddaeb4eb3e24 100644
--- a/tools/lib/subcmd/help.c
+++ b/tools/lib/subcmd/help.c
@@ -75,6 +75,9 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
size_t ci, cj, ei;
int cmp;
+ if (!excludes->cnt)
+ return;
+
ci = cj = ei = 0;
while (ci < cmds->cnt && ei < excludes->cnt) {
cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index ef032e17fec4..eb295756c3bf 100755
--- a/tools/net/ynl/pyynl/ynl_gen_c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -830,7 +830,7 @@ class TypeArrayNest(Type):
'ynl_attr_for_each_nested(attr2, attr) {',
'\tif (ynl_attr_validate(yarg, attr2))',
'\t\treturn YNL_PARSE_CB_ERROR;',
- f'\t{var}->_count.{self.c_name}++;',
+ f'\tn_{self.c_name}++;',
'}']
return get_lines, None, local_vars
diff --git a/tools/objtool/arch/loongarch/decode.c b/tools/objtool/arch/loongarch/decode.c
index b6fdc68053cc..2e555c4060c5 100644
--- a/tools/objtool/arch/loongarch/decode.c
+++ b/tools/objtool/arch/loongarch/decode.c
@@ -278,6 +278,25 @@ static bool decode_insn_reg2i16_fomat(union loongarch_instruction inst,
return true;
}
+static bool decode_insn_reg3_fomat(union loongarch_instruction inst,
+ struct instruction *insn)
+{
+ switch (inst.reg3_format.opcode) {
+ case amswapw_op:
+ if (inst.reg3_format.rd == LOONGARCH_GPR_ZERO &&
+ inst.reg3_format.rk == LOONGARCH_GPR_RA &&
+ inst.reg3_format.rj == LOONGARCH_GPR_ZERO) {
+ /* amswap.w $zero, $ra, $zero */
+ insn->type = INSN_BUG;
+ }
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
unsigned long offset, unsigned int maxlen,
struct instruction *insn)
@@ -309,11 +328,19 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
return 0;
if (decode_insn_reg2i16_fomat(inst, insn))
return 0;
+ if (decode_insn_reg3_fomat(inst, insn))
+ return 0;
- if (inst.word == 0)
+ if (inst.word == 0) {
+ /* andi $zero, $zero, 0x0 */
insn->type = INSN_NOP;
- else if (inst.reg0i15_format.opcode == break_op) {
- /* break */
+ } else if (inst.reg0i15_format.opcode == break_op &&
+ inst.reg0i15_format.immediate == 0x0) {
+ /* break 0x0 */
+ insn->type = INSN_TRAP;
+ } else if (inst.reg0i15_format.opcode == break_op &&
+ inst.reg0i15_format.immediate == 0x1) {
+ /* break 0x1 */
insn->type = INSN_BUG;
} else if (inst.reg2_format.opcode == ertn_op) {
/* ertn */
diff --git a/tools/objtool/arch/loongarch/special.c b/tools/objtool/arch/loongarch/special.c
index e39f86d97002..a80b75f7b061 100644
--- a/tools/objtool/arch/loongarch/special.c
+++ b/tools/objtool/arch/loongarch/special.c
@@ -27,6 +27,7 @@ static void get_rodata_table_size_by_table_annotate(struct objtool_file *file,
struct table_info *next_table;
unsigned long tmp_insn_offset;
unsigned long tmp_rodata_offset;
+ bool is_valid_list = false;
rsec = find_section_by_name(file->elf, ".rela.discard.tablejump_annotate");
if (!rsec)
@@ -35,6 +36,12 @@ static void get_rodata_table_size_by_table_annotate(struct objtool_file *file,
INIT_LIST_HEAD(&table_list);
for_each_reloc(rsec, reloc) {
+ if (reloc->sym->sec->rodata)
+ continue;
+
+ if (strcmp(insn->sec->name, reloc->sym->sec->name))
+ continue;
+
orig_table = malloc(sizeof(struct table_info));
if (!orig_table) {
WARN("malloc failed");
@@ -49,6 +56,22 @@ static void get_rodata_table_size_by_table_annotate(struct objtool_file *file,
if (reloc_idx(reloc) + 1 == sec_num_entries(rsec))
break;
+
+ if (strcmp(insn->sec->name, (reloc + 1)->sym->sec->name)) {
+ list_for_each_entry(orig_table, &table_list, jump_info) {
+ if (orig_table->insn_offset == insn->offset) {
+ is_valid_list = true;
+ break;
+ }
+ }
+
+ if (!is_valid_list) {
+ list_del_init(&table_list);
+ continue;
+ }
+
+ break;
+ }
}
list_for_each_entry(orig_table, &table_list, jump_info) {
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 98c4713c1b09..0ad5cc70ecbe 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -880,3 +880,15 @@ unsigned int arch_reloc_size(struct reloc *reloc)
return 8;
}
}
+
+bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc)
+{
+ switch (reloc_type(reloc)) {
+ case R_X86_64_32:
+ case R_X86_64_32S:
+ case R_X86_64_64:
+ return true;
+ default:
+ return false;
+ }
+}
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 80239843e9f0..0f6b197cfcb0 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -87,6 +87,7 @@ static const struct option check_options[] = {
OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"),
OPT_BOOLEAN('u', "uaccess", &opts.uaccess, "validate uaccess rules for SMAP"),
OPT_BOOLEAN(0 , "cfi", &opts.cfi, "annotate kernel control flow integrity (kCFI) function preambles"),
+ OPT_BOOLEAN(0 , "noabs", &opts.noabs, "reject absolute references in allocatable sections"),
OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump),
OPT_GROUP("Options:"),
@@ -162,6 +163,7 @@ static bool opts_valid(void)
opts.hack_noinstr ||
opts.ibt ||
opts.mcount ||
+ opts.noabs ||
opts.noinstr ||
opts.orc ||
opts.retpoline ||
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index d14f20ef1db1..093fcd01dd6e 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -3564,7 +3564,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
if (func && insn_func(insn) && func != insn_func(insn)->pfunc) {
/* Ignore KCFI type preambles, which always fall through */
if (!strncmp(func->name, "__cfi_", 6) ||
- !strncmp(func->name, "__pfx_", 6))
+ !strncmp(func->name, "__pfx_", 6) ||
+ !strncmp(func->name, "__pi___cfi_", 11) ||
+ !strncmp(func->name, "__pi___pfx_", 11))
return 0;
if (file->ignore_unreachables)
@@ -4644,6 +4646,47 @@ static void disas_warned_funcs(struct objtool_file *file)
disas_funcs(funcs);
}
+__weak bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc)
+{
+ unsigned int type = reloc_type(reloc);
+ size_t sz = elf_addr_size(elf);
+
+ return (sz == 8) ? (type == R_ABS64) : (type == R_ABS32);
+}
+
+static int check_abs_references(struct objtool_file *file)
+{
+ struct section *sec;
+ struct reloc *reloc;
+ int ret = 0;
+
+ for_each_sec(file, sec) {
+ /* absolute references in non-loadable sections are fine */
+ if (!(sec->sh.sh_flags & SHF_ALLOC))
+ continue;
+
+ /* section must have an associated .rela section */
+ if (!sec->rsec)
+ continue;
+
+ /*
+ * Special case for compiler generated metadata that is not
+ * consumed until after boot.
+ */
+ if (!strcmp(sec->name, "__patchable_function_entries"))
+ continue;
+
+ for_each_reloc(sec->rsec, reloc) {
+ if (arch_absolute_reloc(file->elf, reloc)) {
+ WARN("section %s has absolute relocation at offset 0x%lx",
+ sec->name, reloc_offset(reloc));
+ ret++;
+ }
+ }
+ }
+ return ret;
+}
+
struct insn_chunk {
void *addr;
struct insn_chunk *next;
@@ -4777,6 +4820,9 @@ int check(struct objtool_file *file)
goto out;
}
+ if (opts.noabs)
+ warnings += check_abs_references(file);
+
if (opts.orc && nr_insns) {
ret = orc_create(file);
if (ret)
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 01ef6f415adf..be33c7b43180 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -97,6 +97,7 @@ bool arch_is_embedded_insn(struct symbol *sym);
int arch_rewrite_retpolines(struct objtool_file *file);
bool arch_pc_relative_reloc(struct reloc *reloc);
+bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc);
unsigned int arch_reloc_size(struct reloc *reloc);
unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table);
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 6b08666fa69d..ab22673862e1 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -26,6 +26,7 @@ struct opts {
bool uaccess;
int prefix;
bool cfi;
+ bool noabs;
/* options: */
bool backtrace;
diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h
index 6a922d046b8e..802895fae3ca 100644
--- a/tools/objtool/noreturns.h
+++ b/tools/objtool/noreturns.h
@@ -45,7 +45,6 @@ NORETURN(rewind_stack_and_make_dead)
NORETURN(rust_begin_unwind)
NORETURN(rust_helper_BUG)
NORETURN(sev_es_terminate)
-NORETURN(snp_abort)
NORETURN(start_kernel)
NORETURN(stop_this_cpu)
NORETURN(usercopy_abort)
diff --git a/tools/perf/arch/arm/entry/syscalls/syscall.tbl b/tools/perf/arch/arm/entry/syscalls/syscall.tbl
index 27c1d5ebcd91..b07e699aaa3c 100644
--- a/tools/perf/arch/arm/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/arm/entry/syscalls/syscall.tbl
@@ -482,3 +482,5 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index 1e8c44c7b614..7a7049c2c307 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -382,3 +382,5 @@
465 n64 listxattrat sys_listxattrat
466 n64 removexattrat sys_removexattrat
467 n64 open_tree_attr sys_open_tree_attr
+468 n64 file_getattr sys_file_getattr
+469 n64 file_setattr sys_file_setattr
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 9a084bdb8926..b453e80dfc00 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -558,3 +558,5 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index a4569b96ef06..8a6744d658db 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -470,3 +470,5 @@
465 common listxattrat sys_listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr sys_file_setattr
diff --git a/tools/perf/arch/sh/entry/syscalls/syscall.tbl b/tools/perf/arch/sh/entry/syscalls/syscall.tbl
index 52a7652fcff6..5e9c9eff5539 100644
--- a/tools/perf/arch/sh/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/sh/entry/syscalls/syscall.tbl
@@ -471,3 +471,5 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
diff --git a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
index 83e45eb6c095..ebb7d06d1044 100644
--- a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
@@ -513,3 +513,5 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
index ac007ea00979..4877e16da69a 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
@@ -473,3 +473,5 @@
465 i386 listxattrat sys_listxattrat
466 i386 removexattrat sys_removexattrat
467 i386 open_tree_attr sys_open_tree_attr
+468 i386 file_getattr sys_file_getattr
+469 i386 file_setattr sys_file_setattr
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index cfb5ca41e30d..92cf0fe2291e 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -391,6 +391,8 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/tools/perf/arch/x86/tests/topdown.c b/tools/perf/arch/x86/tests/topdown.c
index 8d0ea7a4bbc1..1eba3b4594ef 100644
--- a/tools/perf/arch/x86/tests/topdown.c
+++ b/tools/perf/arch/x86/tests/topdown.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "arch-tests.h"
#include "../util/topdown.h"
+#include "debug.h"
#include "evlist.h"
#include "parse-events.h"
#include "pmu.h"
diff --git a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
index f657a77314f8..374e4cb788d8 100644
--- a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
@@ -438,3 +438,5 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c
index aad572a78d7f..12387ea88b9a 100644
--- a/tools/perf/bench/inject-buildid.c
+++ b/tools/perf/bench/inject-buildid.c
@@ -85,7 +85,7 @@ static int add_dso(const char *fpath, const struct stat *sb __maybe_unused,
if (typeflag == FTW_D || typeflag == FTW_SL)
return 0;
- if (filename__read_build_id(fpath, &bid) < 0)
+ if (filename__read_build_id(fpath, &bid, /*block=*/true) < 0)
return 0;
dso->name = realpath(fpath, NULL);
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index c98104481c8a..2e0f2004696a 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -180,7 +180,7 @@ static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi)
struct nscookie nsc;
nsinfo__mountns_enter(nsi, &nsc);
- err = filename__read_build_id(filename, &bid);
+ err = filename__read_build_id(filename, &bid, /*block=*/true);
nsinfo__mountns_exit(&nsc);
if (err < 0) {
pr_debug("Couldn't read a build-id in %s\n", filename);
@@ -204,7 +204,7 @@ static int build_id_cache__remove_file(const char *filename, struct nsinfo *nsi)
int err;
nsinfo__mountns_enter(nsi, &nsc);
- err = filename__read_build_id(filename, &bid);
+ err = filename__read_build_id(filename, &bid, /*block=*/true);
nsinfo__mountns_exit(&nsc);
if (err < 0) {
pr_debug("Couldn't read a build-id in %s\n", filename);
@@ -280,7 +280,7 @@ static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused)
if (!dso__build_id_filename(dso, filename, sizeof(filename), false))
return true;
- if (filename__read_build_id(filename, &bid) == -1) {
+ if (filename__read_build_id(filename, &bid, /*block=*/true) == -1) {
if (errno == ENOENT)
return false;
@@ -309,7 +309,7 @@ static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi)
int err;
nsinfo__mountns_enter(nsi, &nsc);
- err = filename__read_build_id(filename, &bid);
+ err = filename__read_build_id(filename, &bid, /*block=*/true);
nsinfo__mountns_exit(&nsc);
if (err < 0) {
pr_debug("Couldn't read a build-id in %s\n", filename);
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 40ba6a94f719..a114b3fa1bea 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -680,12 +680,12 @@ static int dso__read_build_id(struct dso *dso)
mutex_lock(dso__lock(dso));
nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
- if (filename__read_build_id(dso__long_name(dso), &bid) > 0)
+ if (filename__read_build_id(dso__long_name(dso), &bid, /*block=*/true) > 0)
dso__set_build_id(dso, &bid);
else if (dso__nsinfo(dso)) {
char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso));
- if (new_name && filename__read_build_id(new_name, &bid) > 0)
+ if (new_name && filename__read_build_id(new_name, &bid, /*block=*/true) > 0)
dso__set_build_id(dso, &bid);
free(new_name);
}
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index fd49703021fd..078634461df2 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -2009,6 +2009,7 @@ static int __cmd_contention(int argc, const char **argv)
.owner = show_lock_owner,
.cgroups = RB_ROOT,
};
+ struct perf_env host_env;
lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
if (!lockhash_table)
@@ -2024,7 +2025,10 @@ static int __cmd_contention(int argc, const char **argv)
eops.mmap = perf_event__process_mmap;
eops.tracing_data = perf_event__process_tracing_data;
- session = perf_session__new(use_bpf ? NULL : &data, &eops);
+ perf_env__init(&host_env);
+ session = __perf_session__new(use_bpf ? NULL : &data, &eops,
+ /*trace_event_repipe=*/false, &host_env);
+
if (IS_ERR(session)) {
pr_err("Initializing perf session failed\n");
err = PTR_ERR(session);
@@ -2142,6 +2146,7 @@ out_delete:
evlist__delete(con.evlist);
lock_contention_finish(&con);
perf_session__delete(session);
+ perf_env__exit(&host_env);
zfree(&lockhash_table);
return err;
}
diff --git a/tools/perf/tests/pe-file-parsing.c b/tools/perf/tests/pe-file-parsing.c
index 30c7da79e109..8b31d1d05f90 100644
--- a/tools/perf/tests/pe-file-parsing.c
+++ b/tools/perf/tests/pe-file-parsing.c
@@ -37,7 +37,7 @@ static int run_dir(const char *d)
size_t idx;
scnprintf(filename, PATH_MAX, "%s/pe-file.exe", d);
- ret = filename__read_build_id(filename, &bid);
+ ret = filename__read_build_id(filename, &bid, /*block=*/true);
TEST_ASSERT_VAL("Failed to read build_id",
ret == sizeof(expect_build_id));
TEST_ASSERT_VAL("Wrong build_id", !memcmp(bid.data, expect_build_id,
@@ -49,7 +49,7 @@ static int run_dir(const char *d)
!strcmp(debuglink, expect_debuglink));
scnprintf(debugfile, PATH_MAX, "%s/%s", d, debuglink);
- ret = filename__read_build_id(debugfile, &bid);
+ ret = filename__read_build_id(debugfile, &bid, /*block=*/true);
TEST_ASSERT_VAL("Failed to read debug file build_id",
ret == sizeof(expect_build_id));
TEST_ASSERT_VAL("Wrong build_id", !memcmp(bid.data, expect_build_id,
diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c
index 93baee2eae42..6132f1af3e22 100644
--- a/tools/perf/tests/sdt.c
+++ b/tools/perf/tests/sdt.c
@@ -31,7 +31,7 @@ static int build_id_cache__add_file(const char *filename)
struct build_id bid = { .size = 0, };
int err;
- err = filename__read_build_id(filename, &bid);
+ err = filename__read_build_id(filename, &bid, /*block=*/true);
if (err < 0) {
pr_debug("Failed to read build id of %s\n", filename);
return err;
diff --git a/tools/perf/tests/shell/test_bpf_metadata.sh b/tools/perf/tests/shell/test_bpf_metadata.sh
index 69e3c2055134..be67d56e0f09 100755
--- a/tools/perf/tests/shell/test_bpf_metadata.sh
+++ b/tools/perf/tests/shell/test_bpf_metadata.sh
@@ -61,7 +61,7 @@ test_bpf_metadata() {
/perf_version/ {
if (entry) print $NF;
}
- ' | egrep "$VERS" > /dev/null
+ ' | grep -qF "$VERS"
then
echo "Basic BPF metadata test [Failed invalid output]"
err=1
diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
index a15ac2fa4b20..f291ab4f94eb 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
@@ -90,10 +90,28 @@
#define DN_ATTRIB 0x00000020 /* File changed attibutes */
#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */
+/* Reserved kernel ranges [-100], [-10000, -40000]. */
#define AT_FDCWD -100 /* Special value for dirfd used to
indicate openat should use the
current working directory. */
+/*
+ * The concept of process and threads in userland and the kernel is a confusing
+ * one - within the kernel every thread is a 'task' with its own individual PID,
+ * however from userland's point of view threads are grouped by a single PID,
+ * which is that of the 'thread group leader', typically the first thread
+ * spawned.
+ *
+ * To cut the Gideon knot, for internal kernel usage, we refer to
+ * PIDFD_SELF_THREAD to refer to the current thread (or task from a kernel
+ * perspective), and PIDFD_SELF_THREAD_GROUP to refer to the current thread
+ * group leader...
+ */
+#define PIDFD_SELF_THREAD -10000 /* Current thread. */
+#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */
+
+#define FD_PIDFS_ROOT -10002 /* Root of the pidfs filesystem */
+#define FD_INVALID -10009 /* Invalid file descriptor: -10000 - EBADF = -10009 */
/* Generic flags for the *at(2) family of syscalls. */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h
index 0098b0ce8ccb..0bd678a4a10e 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/fs.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h
@@ -60,6 +60,17 @@
#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */
#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */
+/*
+ * The root inode of procfs is guaranteed to always have the same inode number.
+ * For programs that make heavy use of procfs, verifying that the root is a
+ * real procfs root and using openat2(RESOLVE_{NO_{XDEV,MAGICLINKS},BENEATH})
+ * will allow you to make sure you are never tricked into operating on the
+ * wrong procfs file.
+ */
+enum procfs_ino {
+ PROCFS_ROOT_INO = 1,
+};
+
struct file_clone_range {
__s64 src_fd;
__u64 src_offset;
@@ -91,6 +102,63 @@ struct fs_sysfs_path {
__u8 name[128];
};
+/* Protection info capability flags */
+#define LBMD_PI_CAP_INTEGRITY (1 << 0)
+#define LBMD_PI_CAP_REFTAG (1 << 1)
+
+/* Checksum types for Protection Information */
+#define LBMD_PI_CSUM_NONE 0
+#define LBMD_PI_CSUM_IP 1
+#define LBMD_PI_CSUM_CRC16_T10DIF 2
+#define LBMD_PI_CSUM_CRC64_NVME 4
+
+/* sizeof first published struct */
+#define LBMD_SIZE_VER0 16
+
+/*
+ * Logical block metadata capability descriptor
+ * If the device does not support metadata, all the fields will be zero.
+ * Applications must check lbmd_flags to determine whether metadata is
+ * supported or not.
+ */
+struct logical_block_metadata_cap {
+ /* Bitmask of logical block metadata capability flags */
+ __u32 lbmd_flags;
+ /*
+ * The amount of data described by each unit of logical block
+ * metadata
+ */
+ __u16 lbmd_interval;
+ /*
+ * Size in bytes of the logical block metadata associated with each
+ * interval
+ */
+ __u8 lbmd_size;
+ /*
+ * Size in bytes of the opaque block tag associated with each
+ * interval
+ */
+ __u8 lbmd_opaque_size;
+ /*
+ * Offset in bytes of the opaque block tag within the logical block
+ * metadata
+ */
+ __u8 lbmd_opaque_offset;
+ /* Size in bytes of the T10 PI tuple associated with each interval */
+ __u8 lbmd_pi_size;
+ /* Offset in bytes of T10 PI tuple within the logical block metadata */
+ __u8 lbmd_pi_offset;
+ /* T10 PI guard tag type */
+ __u8 lbmd_guard_tag_type;
+ /* Size in bytes of the T10 PI application tag */
+ __u8 lbmd_app_tag_size;
+ /* Size in bytes of the T10 PI reference tag */
+ __u8 lbmd_ref_tag_size;
+ /* Size in bytes of the T10 PI storage tag */
+ __u8 lbmd_storage_tag_size;
+ __u8 pad;
+};
+
/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
#define FILE_DEDUPE_RANGE_SAME 0
#define FILE_DEDUPE_RANGE_DIFFERS 1
@@ -149,6 +217,24 @@ struct fsxattr {
};
/*
+ * Variable size structure for file_[sg]et_attr().
+ *
+ * Note. This is alternative to the structure 'struct file_kattr'/'struct fsxattr'.
+ * As this structure is passed to/from userspace with its size, this can
+ * be versioned based on the size.
+ */
+struct file_attr {
+ __u64 fa_xflags; /* xflags field value (get/set) */
+ __u32 fa_extsize; /* extsize field value (get/set)*/
+ __u32 fa_nextents; /* nextents field value (get) */
+ __u32 fa_projid; /* project identifier (get/set) */
+ __u32 fa_cowextsize; /* CoW extsize field value (get/set) */
+};
+
+#define FILE_ATTR_SIZE_VER0 24
+#define FILE_ATTR_SIZE_LATEST FILE_ATTR_SIZE_VER0
+
+/*
* Flags for the fsx_xflags field
*/
#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */
@@ -247,6 +333,8 @@ struct fsxattr {
* also /sys/kernel/debug/ for filesystems with debugfs exports
*/
#define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path)
+/* Get logical block metadata capability details */
+#define FS_IOC_GETLBMD_CAP _IOWR(0x15, 2, struct logical_block_metadata_cap)
/*
* Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
index 3b93fb906e3c..ed3aed264aeb 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
@@ -244,6 +244,8 @@ struct prctl_mm_map {
# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT)
/* Unused; kept only for source compatibility */
# define PR_MTE_TCF_SHIFT 1
+/* MTE tag check store only */
+# define PR_MTE_STORE_ONLY (1UL << 19)
/* RISC-V pointer masking tag length */
# define PR_PMLEN_SHIFT 24
# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT)
@@ -255,7 +257,12 @@ struct prctl_mm_map {
/* Dispatch syscalls to a userspace handler */
#define PR_SET_SYSCALL_USER_DISPATCH 59
# define PR_SYS_DISPATCH_OFF 0
-# define PR_SYS_DISPATCH_ON 1
+/* Enable dispatch except for the specified range */
+# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1
+/* Enable dispatch for the specified range */
+# define PR_SYS_DISPATCH_INCLUSIVE_ON 2
+/* Legacy name for backwards compatibility */
+# define PR_SYS_DISPATCH_ON PR_SYS_DISPATCH_EXCLUSIVE_ON
/* The control values for the user space selector when dispatch is enabled */
# define SYSCALL_DISPATCH_FILTER_ALLOW 0
# define SYSCALL_DISPATCH_FILTER_BLOCK 1
diff --git a/tools/perf/trace/beauty/include/uapi/linux/vhost.h b/tools/perf/trace/beauty/include/uapi/linux/vhost.h
index d4b3e2ae1314..c57674a6aa0d 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/vhost.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/vhost.h
@@ -235,4 +235,39 @@
*/
#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \
struct vhost_vring_state)
+
+/* Extended features manipulation */
+#define VHOST_GET_FEATURES_ARRAY _IOR(VHOST_VIRTIO, 0x83, \
+ struct vhost_features_array)
+#define VHOST_SET_FEATURES_ARRAY _IOW(VHOST_VIRTIO, 0x83, \
+ struct vhost_features_array)
+
+/* fork_owner values for vhost */
+#define VHOST_FORK_OWNER_KTHREAD 0
+#define VHOST_FORK_OWNER_TASK 1
+
+/**
+ * VHOST_SET_FORK_FROM_OWNER - Set the fork_owner flag for the vhost device,
+ * This ioctl must called before VHOST_SET_OWNER.
+ * Only available when CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y
+ *
+ * @param fork_owner: An 8-bit value that determines the vhost thread mode
+ *
+ * When fork_owner is set to VHOST_FORK_OWNER_TASK(default value):
+ * - Vhost will create vhost worker as tasks forked from the owner,
+ * inheriting all of the owner's attributes.
+ *
+ * When fork_owner is set to VHOST_FORK_OWNER_KTHREAD:
+ * - Vhost will create vhost workers as kernel threads.
+ */
+#define VHOST_SET_FORK_FROM_OWNER _IOW(VHOST_VIRTIO, 0x84, __u8)
+
+/**
+ * VHOST_GET_FORK_OWNER - Get the current fork_owner flag for the vhost device.
+ * Only available when CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y
+ *
+ * @return: An 8-bit value indicating the current thread mode.
+ */
+#define VHOST_GET_FORK_FROM_OWNER _IOR(VHOST_VIRTIO, 0x85, __u8)
+
#endif
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 5b6d3e899e11..2298cd396c42 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -657,9 +657,15 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
info_node->info_linear = info_linear;
info_node->metadata = NULL;
if (!perf_env__insert_bpf_prog_info(env, info_node)) {
- free(info_linear);
+ /*
+ * Insert failed, likely because of a duplicate event
+ * made by the sideband thread. Ignore synthesizing the
+ * metadata.
+ */
free(info_node);
+ goto out;
}
+ /* info_linear is now owned by info_node and shouldn't be freed below. */
info_linear = NULL;
/*
@@ -827,18 +833,18 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
return err;
}
-static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
+static int perf_env__add_bpf_info(struct perf_env *env, u32 id)
{
struct bpf_prog_info_node *info_node;
struct perf_bpil *info_linear;
struct btf *btf = NULL;
u64 arrays;
u32 btf_id;
- int fd;
+ int fd, err = 0;
fd = bpf_prog_get_fd_by_id(id);
if (fd < 0)
- return;
+ return -EINVAL;
arrays = 1UL << PERF_BPIL_JITED_KSYMS;
arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS;
@@ -852,6 +858,7 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
info_linear = get_bpf_prog_info_linear(fd, arrays);
if (IS_ERR_OR_NULL(info_linear)) {
pr_debug("%s: failed to get BPF program info. aborting\n", __func__);
+ err = PTR_ERR(info_linear);
goto out;
}
@@ -862,38 +869,46 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
info_node->info_linear = info_linear;
info_node->metadata = bpf_metadata_create(&info_linear->info);
if (!perf_env__insert_bpf_prog_info(env, info_node)) {
+ pr_debug("%s: duplicate add bpf info request for id %u\n",
+ __func__, btf_id);
free(info_linear);
free(info_node);
+ goto out;
}
- } else
+ } else {
free(info_linear);
+ err = -ENOMEM;
+ goto out;
+ }
if (btf_id == 0)
goto out;
btf = btf__load_from_kernel_by_id(btf_id);
- if (libbpf_get_error(btf)) {
- pr_debug("%s: failed to get BTF of id %u, aborting\n",
- __func__, btf_id);
- goto out;
+ if (!btf) {
+ err = -errno;
+ pr_debug("%s: failed to get BTF of id %u %d\n", __func__, btf_id, err);
+ } else {
+ perf_env__fetch_btf(env, btf_id, btf);
}
- perf_env__fetch_btf(env, btf_id, btf);
out:
btf__free(btf);
close(fd);
+ return err;
}
static int bpf_event__sb_cb(union perf_event *event, void *data)
{
struct perf_env *env = data;
+ int ret = 0;
if (event->header.type != PERF_RECORD_BPF_EVENT)
return -1;
switch (event->bpf.type) {
case PERF_BPF_EVENT_PROG_LOAD:
- perf_env__add_bpf_info(env, event->bpf.id);
+ ret = perf_env__add_bpf_info(env, event->bpf.id);
case PERF_BPF_EVENT_PROG_UNLOAD:
/*
@@ -907,7 +922,7 @@ static int bpf_event__sb_cb(union perf_event *event, void *data)
break;
}
- return 0;
+ return ret;
}
int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env)
diff --git a/tools/perf/util/bpf-utils.c b/tools/perf/util/bpf-utils.c
index 80b1d2b3729b..5a66dc8594aa 100644
--- a/tools/perf/util/bpf-utils.c
+++ b/tools/perf/util/bpf-utils.c
@@ -20,7 +20,7 @@ struct bpil_array_desc {
*/
};
-static struct bpil_array_desc bpil_array_desc[] = {
+static const struct bpil_array_desc bpil_array_desc[] = {
[PERF_BPIL_JITED_INSNS] = {
offsetof(struct bpf_prog_info, jited_prog_insns),
offsetof(struct bpf_prog_info, jited_prog_len),
@@ -115,7 +115,7 @@ get_bpf_prog_info_linear(int fd, __u64 arrays)
__u32 info_len = sizeof(info);
__u32 data_len = 0;
int i, err;
- void *ptr;
+ __u8 *ptr;
if (arrays >> PERF_BPIL_LAST_ARRAY)
return ERR_PTR(-EINVAL);
@@ -126,15 +126,15 @@ get_bpf_prog_info_linear(int fd, __u64 arrays)
pr_debug("can't get prog info: %s", strerror(errno));
return ERR_PTR(-EFAULT);
}
+ if (info.type >= __MAX_BPF_PROG_TYPE)
+ pr_debug("%s:%d: unexpected program type %u\n", __func__, __LINE__, info.type);
/* step 2: calculate total size of all arrays */
for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
+ const struct bpil_array_desc *desc = &bpil_array_desc[i];
bool include_array = (arrays & (1UL << i)) > 0;
- struct bpil_array_desc *desc;
__u32 count, size;
- desc = bpil_array_desc + i;
-
/* kernel is too old to support this field */
if (info_len < desc->array_offset + sizeof(__u32) ||
info_len < desc->count_offset + sizeof(__u32) ||
@@ -163,19 +163,20 @@ get_bpf_prog_info_linear(int fd, __u64 arrays)
ptr = info_linear->data;
for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
- struct bpil_array_desc *desc;
+ const struct bpil_array_desc *desc = &bpil_array_desc[i];
__u32 count, size;
if ((arrays & (1UL << i)) == 0)
continue;
- desc = bpil_array_desc + i;
count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
bpf_prog_info_set_offset_u32(&info_linear->info,
desc->count_offset, count);
bpf_prog_info_set_offset_u32(&info_linear->info,
desc->size_offset, size);
+ assert(ptr >= info_linear->data);
+ assert(ptr < &info_linear->data[data_len]);
bpf_prog_info_set_offset_u64(&info_linear->info,
desc->array_offset,
ptr_to_u64(ptr));
@@ -189,27 +190,45 @@ get_bpf_prog_info_linear(int fd, __u64 arrays)
free(info_linear);
return ERR_PTR(-EFAULT);
}
+ if (info_linear->info.type >= __MAX_BPF_PROG_TYPE) {
+ pr_debug("%s:%d: unexpected program type %u\n",
+ __func__, __LINE__, info_linear->info.type);
+ }
/* step 6: verify the data */
+ ptr = info_linear->data;
for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
- struct bpil_array_desc *desc;
- __u32 v1, v2;
+ const struct bpil_array_desc *desc = &bpil_array_desc[i];
+ __u32 count1, count2, size1, size2;
+ __u64 ptr2;
if ((arrays & (1UL << i)) == 0)
continue;
- desc = bpil_array_desc + i;
- v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
- v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
+ count1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+ count2 = bpf_prog_info_read_offset_u32(&info_linear->info,
desc->count_offset);
- if (v1 != v2)
- pr_warning("%s: mismatch in element count\n", __func__);
+ if (count1 != count2) {
+ pr_warning("%s: mismatch in element count %u vs %u\n", __func__, count1, count2);
+ free(info_linear);
+ return ERR_PTR(-ERANGE);
+ }
- v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
- v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
+ size1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+ size2 = bpf_prog_info_read_offset_u32(&info_linear->info,
desc->size_offset);
- if (v1 != v2)
- pr_warning("%s: mismatch in rec size\n", __func__);
+ if (size1 != size2) {
+ pr_warning("%s: mismatch in rec size %u vs %u\n", __func__, size1, size2);
+ free(info_linear);
+ return ERR_PTR(-ERANGE);
+ }
+ ptr2 = bpf_prog_info_read_offset_u64(&info_linear->info, desc->array_offset);
+ if (ptr_to_u64(ptr) != ptr2) {
+ pr_warning("%s: mismatch in array %p vs %llx\n", __func__, ptr, ptr2);
+ free(info_linear);
+ return ERR_PTR(-ERANGE);
+ }
+ ptr += roundup(count1 * size1, sizeof(__u64));
}
/* step 7: update info_len and data_len */
@@ -224,13 +243,12 @@ void bpil_addr_to_offs(struct perf_bpil *info_linear)
int i;
for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
- struct bpil_array_desc *desc;
+ const struct bpil_array_desc *desc = &bpil_array_desc[i];
__u64 addr, offs;
if ((info_linear->arrays & (1UL << i)) == 0)
continue;
- desc = bpil_array_desc + i;
addr = bpf_prog_info_read_offset_u64(&info_linear->info,
desc->array_offset);
offs = addr - ptr_to_u64(info_linear->data);
@@ -244,13 +262,12 @@ void bpil_offs_to_addr(struct perf_bpil *info_linear)
int i;
for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) {
- struct bpil_array_desc *desc;
+ const struct bpil_array_desc *desc = &bpil_array_desc[i];
__u64 addr, offs;
if ((info_linear->arrays & (1UL << i)) == 0)
continue;
- desc = bpil_array_desc + i;
offs = bpf_prog_info_read_offset_u64(&info_linear->info,
desc->array_offset);
addr = offs + ptr_to_u64(info_linear->data);
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index a7018a3b0437..bf7f3268b9a2 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -115,7 +115,7 @@ int filename__snprintf_build_id(const char *pathname, char *sbuild_id, size_t sb
struct build_id bid = { .size = 0, };
int ret;
- ret = filename__read_build_id(pathname, &bid);
+ ret = filename__read_build_id(pathname, &bid, /*block=*/true);
if (ret < 0)
return ret;
@@ -841,7 +841,7 @@ static int filename__read_build_id_ns(const char *filename,
int ret;
nsinfo__mountns_enter(nsi, &nsc);
- ret = filename__read_build_id(filename, bid);
+ ret = filename__read_build_id(filename, bid, /*block=*/true);
nsinfo__mountns_exit(&nsc);
return ret;
diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c
index a44c70f93156..bb9ebd84ec2d 100644
--- a/tools/perf/util/debuginfo.c
+++ b/tools/perf/util/debuginfo.c
@@ -110,8 +110,12 @@ struct debuginfo *debuginfo__new(const char *path)
if (!dso)
goto out;
- /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */
- if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0)
+ /*
+ * Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO. Don't block
+ * incase the path isn't for a regular file.
+ */
+ assert(!dso__has_build_id(dso));
+ if (filename__read_build_id(path, &bid, /*block=*/false) > 0)
dso__set_build_id(dso, &bid);
for (type = distro_dwarf_types;
diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c
index 0a7645c7fae7..64c1d65b0149 100644
--- a/tools/perf/util/dsos.c
+++ b/tools/perf/util/dsos.c
@@ -81,13 +81,13 @@ static int dsos__read_build_ids_cb(struct dso *dso, void *data)
return 0;
}
nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
- if (filename__read_build_id(dso__long_name(dso), &bid) > 0) {
+ if (filename__read_build_id(dso__long_name(dso), &bid, /*block=*/true) > 0) {
dso__set_build_id(dso, &bid);
args->have_build_id = true;
} else if (errno == ENOENT && dso__nsinfo(dso)) {
char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso));
- if (new_name && filename__read_build_id(new_name, &bid) > 0) {
+ if (new_name && filename__read_build_id(new_name, &bid, /*block=*/true) > 0) {
dso__set_build_id(dso, &bid);
args->have_build_id = true;
}
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
index 89979ca23c3f..34e2fdfe7300 100644
--- a/tools/perf/util/include/linux/linkage.h
+++ b/tools/perf/util/include/linux/linkage.h
@@ -120,7 +120,7 @@
#endif
// In the kernel sources (include/linux/cfi_types.h), this has a different
-// definition when CONFIG_CFI_CLANG is used, for tools/ just use the !clang
+// definition when CONFIG_CFI is used, for tools/ just use the !cfi
// definition:
#ifndef SYM_TYPED_START
#define SYM_TYPED_START(name, linkage, align...) \
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 8fabddc1c0da..72c7a4e15d61 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -32,7 +32,7 @@ static void intel_pt_insn_decoder(struct insn *insn,
intel_pt_insn->rel = 0;
intel_pt_insn->emulated_ptwrite = false;
- if (insn_is_avx(insn)) {
+ if (insn_is_avx_or_xop(insn)) {
intel_pt_insn->op = INTEL_PT_OP_OTHER;
intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH;
intel_pt_insn->length = insn->length;
diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c
index 85b2a93a59ac..779f6230130a 100644
--- a/tools/perf/util/maps.c
+++ b/tools/perf/util/maps.c
@@ -477,6 +477,7 @@ static int __maps__insert(struct maps *maps, struct map *new)
}
/* Insert the value at the end. */
maps_by_address[nr_maps] = map__get(new);
+ map__set_kmap_maps(new, maps);
if (maps_by_name)
maps_by_name[nr_maps] = map__get(new);
@@ -502,8 +503,6 @@ static int __maps__insert(struct maps *maps, struct map *new)
if (map__end(new) < map__start(new))
RC_CHK_ACCESS(maps)->ends_broken = true;
- map__set_kmap_maps(new, maps);
-
return 0;
}
@@ -891,6 +890,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
if (before) {
map__put(maps_by_address[i]);
maps_by_address[i] = before;
+ map__set_kmap_maps(before, maps);
if (maps_by_name) {
map__put(maps_by_name[ni]);
@@ -918,6 +918,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
*/
map__put(maps_by_address[i]);
maps_by_address[i] = map__get(new);
+ map__set_kmap_maps(new, maps);
if (maps_by_name) {
map__put(maps_by_name[ni]);
@@ -942,14 +943,13 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new)
*/
map__put(maps_by_address[i]);
maps_by_address[i] = map__get(new);
+ map__set_kmap_maps(new, maps);
if (maps_by_name) {
map__put(maps_by_name[ni]);
maps_by_name[ni] = map__get(new);
}
- map__set_kmap_maps(new, maps);
-
check_invariants(maps);
return err;
}
@@ -1019,6 +1019,7 @@ int maps__copy_from(struct maps *dest, struct maps *parent)
err = unwind__prepare_access(dest, new, NULL);
if (!err) {
dest_maps_by_address[i] = new;
+ map__set_kmap_maps(new, dest);
if (dest_maps_by_name)
dest_maps_by_name[i] = map__get(new);
RC_CHK_ACCESS(dest)->nr_maps = i + 1;
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 6d2c280a1730..1346fd180653 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -873,13 +873,17 @@ out:
#ifdef HAVE_LIBBFD_BUILDID_SUPPORT
-static int read_build_id(const char *filename, struct build_id *bid)
+static int read_build_id(const char *filename, struct build_id *bid, bool block)
{
size_t size = sizeof(bid->data);
- int err = -1;
+ int err = -1, fd;
bfd *abfd;
- abfd = bfd_openr(filename, NULL);
+ fd = open(filename, block ? O_RDONLY : (O_RDONLY | O_NONBLOCK));
+ if (fd < 0)
+ return -1;
+
+ abfd = bfd_fdopenr(filename, /*target=*/NULL, fd);
if (!abfd)
return -1;
@@ -902,7 +906,7 @@ out_close:
#else // HAVE_LIBBFD_BUILDID_SUPPORT
-static int read_build_id(const char *filename, struct build_id *bid)
+static int read_build_id(const char *filename, struct build_id *bid, bool block)
{
size_t size = sizeof(bid->data);
int fd, err = -1;
@@ -911,7 +915,7 @@ static int read_build_id(const char *filename, struct build_id *bid)
if (size < BUILD_ID_SIZE)
goto out;
- fd = open(filename, O_RDONLY);
+ fd = open(filename, block ? O_RDONLY : (O_RDONLY | O_NONBLOCK));
if (fd < 0)
goto out;
@@ -934,7 +938,7 @@ out:
#endif // HAVE_LIBBFD_BUILDID_SUPPORT
-int filename__read_build_id(const char *filename, struct build_id *bid)
+int filename__read_build_id(const char *filename, struct build_id *bid, bool block)
{
struct kmod_path m = { .name = NULL, };
char path[PATH_MAX];
@@ -958,9 +962,10 @@ int filename__read_build_id(const char *filename, struct build_id *bid)
}
close(fd);
filename = path;
+ block = true;
}
- err = read_build_id(filename, bid);
+ err = read_build_id(filename, bid, block);
if (m.comp)
unlink(filename);
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index 7201494c5c20..41e4ebe5eac5 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -4,7 +4,6 @@
#include <errno.h>
#include <unistd.h>
-#include <stdio.h>
#include <fcntl.h>
#include <string.h>
#include <stdlib.h>
@@ -86,13 +85,10 @@ int filename__read_debuglink(const char *filename __maybe_unused,
/*
* Just try PT_NOTE header otherwise fails
*/
-int filename__read_build_id(const char *filename, struct build_id *bid)
+int filename__read_build_id(const char *filename, struct build_id *bid, bool block)
{
- FILE *fp;
- int ret = -1;
+ int fd, ret = -1;
bool need_swap = false, elf32;
- u8 e_ident[EI_NIDENT];
- int i;
union {
struct {
Elf32_Ehdr ehdr32;
@@ -103,28 +99,27 @@ int filename__read_build_id(const char *filename, struct build_id *bid)
Elf64_Phdr *phdr64;
};
} hdrs;
- void *phdr;
- size_t phdr_size;
- void *buf = NULL;
- size_t buf_size = 0;
+ void *phdr, *buf = NULL;
+ ssize_t phdr_size, ehdr_size, buf_size = 0;
- fp = fopen(filename, "r");
- if (fp == NULL)
+ fd = open(filename, block ? O_RDONLY : (O_RDONLY | O_NONBLOCK));
+ if (fd < 0)
return -1;
- if (fread(e_ident, sizeof(e_ident), 1, fp) != 1)
+ if (read(fd, hdrs.ehdr32.e_ident, EI_NIDENT) != EI_NIDENT)
goto out;
- if (memcmp(e_ident, ELFMAG, SELFMAG) ||
- e_ident[EI_VERSION] != EV_CURRENT)
+ if (memcmp(hdrs.ehdr32.e_ident, ELFMAG, SELFMAG) ||
+ hdrs.ehdr32.e_ident[EI_VERSION] != EV_CURRENT)
goto out;
- need_swap = check_need_swap(e_ident[EI_DATA]);
- elf32 = e_ident[EI_CLASS] == ELFCLASS32;
+ need_swap = check_need_swap(hdrs.ehdr32.e_ident[EI_DATA]);
+ elf32 = hdrs.ehdr32.e_ident[EI_CLASS] == ELFCLASS32;
+ ehdr_size = (elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64)) - EI_NIDENT;
- if (fread(elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64,
- elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64),
- 1, fp) != 1)
+ if (read(fd,
+ (elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64) + EI_NIDENT,
+ ehdr_size) != ehdr_size)
goto out;
if (need_swap) {
@@ -138,14 +133,18 @@ int filename__read_build_id(const char *filename, struct build_id *bid)
hdrs.ehdr64.e_phnum = bswap_16(hdrs.ehdr64.e_phnum);
}
}
- phdr_size = elf32 ? hdrs.ehdr32.e_phentsize * hdrs.ehdr32.e_phnum
- : hdrs.ehdr64.e_phentsize * hdrs.ehdr64.e_phnum;
+ if ((elf32 && hdrs.ehdr32.e_phentsize != sizeof(Elf32_Phdr)) ||
+ (!elf32 && hdrs.ehdr64.e_phentsize != sizeof(Elf64_Phdr)))
+ goto out;
+
+ phdr_size = elf32 ? sizeof(Elf32_Phdr) * hdrs.ehdr32.e_phnum
+ : sizeof(Elf64_Phdr) * hdrs.ehdr64.e_phnum;
phdr = malloc(phdr_size);
if (phdr == NULL)
goto out;
- fseek(fp, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET);
- if (fread(phdr, phdr_size, 1, fp) != 1)
+ lseek(fd, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET);
+ if (read(fd, phdr, phdr_size) != phdr_size)
goto out_free;
if (elf32)
@@ -153,8 +152,8 @@ int filename__read_build_id(const char *filename, struct build_id *bid)
else
hdrs.phdr64 = phdr;
- for (i = 0; i < elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum; i++) {
- size_t p_filesz;
+ for (int i = 0; i < (elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum); i++) {
+ ssize_t p_filesz;
if (need_swap) {
if (elf32) {
@@ -180,8 +179,8 @@ int filename__read_build_id(const char *filename, struct build_id *bid)
goto out_free;
buf = tmp;
}
- fseek(fp, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET);
- if (fread(buf, p_filesz, 1, fp) != 1)
+ lseek(fd, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET);
+ if (read(fd, buf, p_filesz) != p_filesz)
goto out_free;
ret = read_build_id(buf, p_filesz, bid, need_swap);
@@ -194,7 +193,7 @@ out_free:
free(buf);
free(phdr);
out:
- fclose(fp);
+ close(fd);
return ret;
}
@@ -324,7 +323,7 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused,
if (ret >= 0)
RC_CHK_ACCESS(dso)->is_64_bit = ret;
- if (filename__read_build_id(ss->name, &bid) > 0)
+ if (filename__read_build_id(ss->name, &bid, /*block=*/true) > 0)
dso__set_build_id(dso, &bid);
return 0;
}
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index e816e4220d33..3fed54de5401 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1869,14 +1869,14 @@ int dso__load(struct dso *dso, struct map *map)
/*
* Read the build id if possible. This is required for
- * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
+ * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work. Don't block in case path
+ * isn't for a regular file.
*/
- if (!dso__has_build_id(dso) &&
- is_regular_file(dso__long_name(dso))) {
+ if (!dso__has_build_id(dso)) {
struct build_id bid = { .size = 0, };
__symbol__join_symfs(name, PATH_MAX, dso__long_name(dso));
- if (filename__read_build_id(name, &bid) > 0)
+ if (filename__read_build_id(name, &bid, /*block=*/false) > 0)
dso__set_build_id(dso, &bid);
}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 3fb5d146d9b1..347106218799 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -140,7 +140,7 @@ struct symbol *dso__next_symbol(struct symbol *sym);
enum dso_type dso__type_fd(int fd);
-int filename__read_build_id(const char *filename, struct build_id *id);
+int filename__read_build_id(const char *filename, struct build_id *id, bool block);
int sysfs__read_build_id(const char *filename, struct build_id *bid);
int modules__parse(const char *filename, void *arg,
int (*process_module)(void *arg, const char *name,
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index cb2c1ace304a..fcd1fd13c30e 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -401,7 +401,7 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event,
nsi = nsinfo__new(event->pid);
nsinfo__mountns_enter(nsi, &nc);
- rc = filename__read_build_id(event->filename, &bid) > 0 ? 0 : -1;
+ rc = filename__read_build_id(event->filename, &bid, /*block=*/false) > 0 ? 0 : -1;
nsinfo__mountns_exit(&nc);
nsinfo__put(nsi);
diff --git a/tools/power/cpupower/man/cpupower-set.1 b/tools/power/cpupower/man/cpupower-set.1
index 500653ef98c7..8ac82b6f9189 100644
--- a/tools/power/cpupower/man/cpupower-set.1
+++ b/tools/power/cpupower/man/cpupower-set.1
@@ -81,10 +81,11 @@ Refer to the AMD P-State kernel documentation for further information.
.RE
.PP
-\-\-turbo\-boost, \-t
+\-\-turbo\-boost, \-\-boost, \-t
.RS 4
-This option is used to enable or disable the turbo boost feature on
-supported Intel and AMD processors.
+This option is used to enable or disable the boost feature on
+supported Intel and AMD processors, and other boost supported systems.
+(The --boost option is an alias for the --turbo-boost option)
This option takes as parameter either \fB1\fP to enable, or \fB0\fP to disable the feature.
diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c
index fc750e127404..7d3732f5f2f6 100644
--- a/tools/power/cpupower/utils/cpufreq-info.c
+++ b/tools/power/cpupower/utils/cpufreq-info.c
@@ -128,7 +128,7 @@ static int get_boost_mode_x86(unsigned int cpu)
/* ToDo: Make this more global */
unsigned long pstates[MAX_HW_PSTATES] = {0,};
- ret = cpufreq_has_boost_support(cpu, &support, &active, &b_states);
+ ret = cpufreq_has_x86_boost_support(cpu, &support, &active, &b_states);
if (ret) {
printf(_("Error while evaluating Boost Capabilities"
" on CPU %d -- are you root?\n"), cpu);
@@ -204,6 +204,18 @@ static int get_boost_mode_x86(unsigned int cpu)
return 0;
}
+static int get_boost_mode_generic(unsigned int cpu)
+{
+ bool active;
+
+ if (!cpufreq_has_generic_boost_support(&active)) {
+ printf(_(" boost state support:\n"));
+ printf(_(" Active: %s\n"), active ? _("yes") : _("no"));
+ }
+
+ return 0;
+}
+
/* --boost / -b */
static int get_boost_mode(unsigned int cpu)
@@ -214,6 +226,8 @@ static int get_boost_mode(unsigned int cpu)
cpupower_cpu_info.vendor == X86_VENDOR_HYGON ||
cpupower_cpu_info.vendor == X86_VENDOR_INTEL)
return get_boost_mode_x86(cpu);
+ else
+ get_boost_mode_generic(cpu);
freqs = cpufreq_get_boost_frequencies(cpu);
if (freqs) {
diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c
index 0677b58374ab..c2117e5650dd 100644
--- a/tools/power/cpupower/utils/cpupower-set.c
+++ b/tools/power/cpupower/utils/cpupower-set.c
@@ -21,6 +21,7 @@ static struct option set_opts[] = {
{"epp", required_argument, NULL, 'e'},
{"amd-pstate-mode", required_argument, NULL, 'm'},
{"turbo-boost", required_argument, NULL, 't'},
+ {"boost", required_argument, NULL, 't'},
{ },
};
@@ -62,8 +63,8 @@ int cmd_set(int argc, char **argv)
params.params = 0;
/* parameter parsing */
- while ((ret = getopt_long(argc, argv, "b:e:m:",
- set_opts, NULL)) != -1) {
+ while ((ret = getopt_long(argc, argv, "b:e:m:t:",
+ set_opts, NULL)) != -1) {
switch (ret) {
case 'b':
if (params.perf_bias)
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index 95749b8ee475..82ea62bdf5a2 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -103,6 +103,9 @@ extern struct cpupower_cpu_info cpupower_cpu_info;
/* cpuid and cpuinfo helpers **************************/
+int cpufreq_has_generic_boost_support(bool *active);
+int cpupower_set_turbo_boost(int turbo_boost);
+
/* X86 ONLY ****************************************/
#if defined(__i386__) || defined(__x86_64__)
@@ -118,7 +121,6 @@ extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu);
extern int cpupower_set_epp(unsigned int cpu, char *epp);
extern int cpupower_set_amd_pstate_mode(char *mode);
-extern int cpupower_set_turbo_boost(int turbo_boost);
/* Read/Write msr ****************************/
@@ -139,8 +141,8 @@ extern int decode_pstates(unsigned int cpu, int boost_states,
/* AMD HW pstate decoding **************************/
-extern int cpufreq_has_boost_support(unsigned int cpu, int *support,
- int *active, int * states);
+int cpufreq_has_x86_boost_support(unsigned int cpu, int *support,
+ int *active, int *states);
/* AMD P-State stuff **************************/
bool cpupower_amd_pstate_enabled(void);
@@ -181,13 +183,11 @@ static inline int cpupower_set_epp(unsigned int cpu, char *epp)
{ return -1; };
static inline int cpupower_set_amd_pstate_mode(char *mode)
{ return -1; };
-static inline int cpupower_set_turbo_boost(int turbo_boost)
-{ return -1; };
/* Read/Write msr ****************************/
-static inline int cpufreq_has_boost_support(unsigned int cpu, int *support,
- int *active, int * states)
+static inline int cpufreq_has_x86_boost_support(unsigned int cpu, int *support,
+ int *active, int *states)
{ return -1; }
static inline bool cpupower_amd_pstate_enabled(void)
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index 76e461ff4f74..166dc1e470ea 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -8,15 +8,14 @@
#include "helpers/helpers.h"
#include "helpers/sysfs.h"
#include "cpufreq.h"
+#include "cpupower_intern.h"
#if defined(__i386__) || defined(__x86_64__)
-#include "cpupower_intern.h"
-
#define MSR_AMD_HWCR 0xc0010015
-int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
- int *states)
+int cpufreq_has_x86_boost_support(unsigned int cpu, int *support, int *active,
+ int *states)
{
int ret;
unsigned long long val;
@@ -124,24 +123,6 @@ int cpupower_set_amd_pstate_mode(char *mode)
return 0;
}
-int cpupower_set_turbo_boost(int turbo_boost)
-{
- char path[SYSFS_PATH_MAX];
- char linebuf[2] = {};
-
- snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost");
-
- if (!is_valid_path(path))
- return -1;
-
- snprintf(linebuf, sizeof(linebuf), "%d", turbo_boost);
-
- if (cpupower_write_sysfs(path, linebuf, 2) <= 0)
- return -1;
-
- return 0;
-}
-
bool cpupower_amd_pstate_enabled(void)
{
char *driver = cpufreq_get_driver(0);
@@ -160,6 +141,39 @@ bool cpupower_amd_pstate_enabled(void)
#endif /* #if defined(__i386__) || defined(__x86_64__) */
+int cpufreq_has_generic_boost_support(bool *active)
+{
+ char path[SYSFS_PATH_MAX];
+ char linebuf[2] = {};
+ unsigned long val;
+ char *endp;
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost");
+
+ if (!is_valid_path(path))
+ return -EACCES;
+
+ if (cpupower_read_sysfs(path, linebuf, 2) <= 0)
+ return -EINVAL;
+
+ val = strtoul(linebuf, &endp, 0);
+ if (endp == linebuf || errno == ERANGE)
+ return -EINVAL;
+
+ switch (val) {
+ case 0:
+ *active = false;
+ break;
+ case 1:
+ *active = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/* get_cpustate
*
* Gather the information of all online CPUs into bitmask struct
@@ -259,3 +273,21 @@ void print_speed(unsigned long speed, int no_rounding)
}
}
}
+
+int cpupower_set_turbo_boost(int turbo_boost)
+{
+ char path[SYSFS_PATH_MAX];
+ char linebuf[2] = {};
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpufreq/boost");
+
+ if (!is_valid_path(path))
+ return -1;
+
+ snprintf(linebuf, sizeof(linebuf), "%d", turbo_boost);
+
+ if (cpupower_write_sysfs(path, linebuf, 2) <= 0)
+ return -1;
+
+ return 0;
+}
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 72a280e7a9d5..47eb2d4d13a5 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1195,7 +1195,7 @@ static const struct platform_data turbostat_pdata[] = {
{ INTEL_EMERALDRAPIDS_X, &spr_features },
{ INTEL_GRANITERAPIDS_X, &spr_features },
{ INTEL_GRANITERAPIDS_D, &spr_features },
- { INTEL_PANTHERCOVE_X, &dmr_features },
+ { INTEL_DIAMONDRAPIDS_X, &dmr_features },
{ INTEL_LAKEFIELD, &cnl_features },
{ INTEL_ALDERLAKE, &adl_features },
{ INTEL_ALDERLAKE_L, &adl_features },
diff --git a/tools/sched_ext/include/scx/bpf_arena_common.bpf.h b/tools/sched_ext/include/scx/bpf_arena_common.bpf.h
new file mode 100644
index 000000000000..4366fb3c91ce
--- /dev/null
+++ b/tools/sched_ext/include/scx/bpf_arena_common.bpf.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE __PAGE_SIZE
+/*
+ * for older kernels try sizeof(struct genradix_node)
+ * or flexible:
+ * static inline long __bpf_page_size(void) {
+ * return bpf_core_enum_value(enum page_size_enum___l, __PAGE_SIZE___l) ?: sizeof(struct genradix_node);
+ * }
+ * but generated code is not great.
+ */
+#endif
+
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM)
+#define __arena __attribute__((address_space(1)))
+#define __arena_global __attribute__((address_space(1)))
+#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
+#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
+#else
+
+/* emit instruction:
+ * rX = rX .off = BPF_ADDR_SPACE_CAST .imm32 = (dst_as << 16) | src_as
+ *
+ * This is a workaround for LLVM compiler versions without
+ * __BPF_FEATURE_ADDR_SPACE_CAST that do not automatically cast between arena
+ * pointers and native kernel/userspace ones. In this case we explicitly do so
+ * with cast_kern() and cast_user(). E.g., in the Linux kernel tree,
+ * tools/testing/selftests/bpf includes tests that use these macros to implement
+ * linked lists and hashtables backed by arena memory. In sched_ext, we use
+ * cast_kern() and cast_user() for compatibility with older LLVM toolchains.
+ */
+#ifndef bpf_addr_space_cast
+#define bpf_addr_space_cast(var, dst_as, src_as)\
+ asm volatile(".byte 0xBF; \
+ .ifc %[reg], r0; \
+ .byte 0x00; \
+ .endif; \
+ .ifc %[reg], r1; \
+ .byte 0x11; \
+ .endif; \
+ .ifc %[reg], r2; \
+ .byte 0x22; \
+ .endif; \
+ .ifc %[reg], r3; \
+ .byte 0x33; \
+ .endif; \
+ .ifc %[reg], r4; \
+ .byte 0x44; \
+ .endif; \
+ .ifc %[reg], r5; \
+ .byte 0x55; \
+ .endif; \
+ .ifc %[reg], r6; \
+ .byte 0x66; \
+ .endif; \
+ .ifc %[reg], r7; \
+ .byte 0x77; \
+ .endif; \
+ .ifc %[reg], r8; \
+ .byte 0x88; \
+ .endif; \
+ .ifc %[reg], r9; \
+ .byte 0x99; \
+ .endif; \
+ .short %[off]; \
+ .long %[as]" \
+ : [reg]"+r"(var) \
+ : [off]"i"(BPF_ADDR_SPACE_CAST) \
+ , [as]"i"((dst_as << 16) | src_as));
+#endif
+
+#define __arena
+#define __arena_global SEC(".addr_space.1")
+#define cast_kern(ptr) bpf_addr_space_cast(ptr, 0, 1)
+#define cast_user(ptr) bpf_addr_space_cast(ptr, 1, 0)
+#endif
+
+void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
+ int node_id, __u64 flags) __ksym __weak;
+void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;
+
+/*
+ * Note that cond_break can only be portably used in the body of a breakable
+ * construct, whereas can_loop can be used anywhere.
+ */
+#ifdef TEST
+#define can_loop true
+#define __cond_break(expr) expr
+#else
+#ifdef __BPF_FEATURE_MAY_GOTO
+#define can_loop \
+ ({ __label__ l_break, l_continue; \
+ bool ret = true; \
+ asm volatile goto("may_goto %l[l_break]" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: ret = false; \
+ l_continue:; \
+ ret; \
+ })
+
+#define __cond_break(expr) \
+ ({ __label__ l_break, l_continue; \
+ asm volatile goto("may_goto %l[l_break]" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: expr; \
+ l_continue:; \
+ })
+#else
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define can_loop \
+ ({ __label__ l_break, l_continue; \
+ bool ret = true; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: ret = false; \
+ l_continue:; \
+ ret; \
+ })
+
+#define __cond_break(expr) \
+ ({ __label__ l_break, l_continue; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: expr; \
+ l_continue:; \
+ })
+#else
+#define can_loop \
+ ({ __label__ l_break, l_continue; \
+ bool ret = true; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: ret = false; \
+ l_continue:; \
+ ret; \
+ })
+
+#define __cond_break(expr) \
+ ({ __label__ l_break, l_continue; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: expr; \
+ l_continue:; \
+ })
+#endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
+#endif /* __BPF_FEATURE_MAY_GOTO */
+#endif /* TEST */
+
+#define cond_break __cond_break(break)
+#define cond_break_label(label) __cond_break(goto label)
+
+
+void bpf_preempt_disable(void) __weak __ksym;
+void bpf_preempt_enable(void) __weak __ksym;
diff --git a/tools/sched_ext/include/scx/bpf_arena_common.h b/tools/sched_ext/include/scx/bpf_arena_common.h
new file mode 100644
index 000000000000..10141db0b59d
--- /dev/null
+++ b/tools/sched_ext/include/scx/bpf_arena_common.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+
+#ifndef arena_container_of
+#define arena_container_of(ptr, type, member) \
+ ({ \
+ void __arena *__mptr = (void __arena *)(ptr); \
+ ((type *)(__mptr - offsetof(type, member))); \
+ })
+#endif
+
+/* Provide the definition of PAGE_SIZE. */
+#include <sys/user.h>
+
+#define __arena
+#define __arg_arena
+#define cast_kern(ptr) /* nop for user space */
+#define cast_user(ptr) /* nop for user space */
+char __attribute__((weak)) arena[1];
+
+#ifndef offsetof
+#define offsetof(type, member) ((unsigned long)&((type *)0)->member)
+#endif
+
+static inline void __arena* bpf_arena_alloc_pages(void *map, void *addr, __u32 page_cnt,
+ int node_id, __u64 flags)
+{
+ return NULL;
+}
+static inline void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt)
+{
+}
diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h
index d4e21558e982..06e2551033cb 100644
--- a/tools/sched_ext/include/scx/common.bpf.h
+++ b/tools/sched_ext/include/scx/common.bpf.h
@@ -24,14 +24,26 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <asm-generic/errno.h>
-#include "user_exit_info.h"
+#include "user_exit_info.bpf.h"
#include "enum_defs.autogen.h"
+#define PF_IDLE 0x00000002 /* I am an IDLE thread */
+#define PF_IO_WORKER 0x00000010 /* Task is an IO worker */
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
+#define PF_KCOMPACTD 0x00010000 /* I am kcompactd */
+#define PF_KSWAPD 0x00020000 /* I am kswapd */
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_EXITING 0x00000004
#define CLOCK_MONOTONIC 1
+#ifndef NR_CPUS
+#define NR_CPUS 1024
+#endif
+
+#ifndef NUMA_NO_NODE
+#define NUMA_NO_NODE (-1)
+#endif
+
extern int LINUX_KERNEL_VERSION __kconfig;
extern const char CONFIG_CC_VERSION_TEXT[64] __kconfig __weak;
extern const char CONFIG_LOCALVERSION[64] __kconfig __weak;
@@ -91,6 +103,8 @@ s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
bool scx_bpf_task_running(const struct task_struct *p) __ksym;
s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym;
+struct rq *scx_bpf_locked_rq(void) __ksym;
+struct task_struct *scx_bpf_cpu_curr(s32 cpu) __ksym __weak;
struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak;
u64 scx_bpf_now(void) __ksym __weak;
void scx_bpf_events(struct scx_event_stats *events, size_t events__sz) __ksym __weak;
@@ -107,6 +121,9 @@ void scx_bpf_events(struct scx_event_stats *events, size_t events__sz) __ksym __
static inline __attribute__((format(printf, 1, 2)))
void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
+#define SCX_STRINGIFY(x) #x
+#define SCX_TOSTRING(x) SCX_STRINGIFY(x)
+
/*
* Helper macro for initializing the fmt and variadic argument inputs to both
* bstr exit kfuncs. Callers to this function should use ___fmt and ___param to
@@ -141,13 +158,15 @@ void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
* scx_bpf_error() wraps the scx_bpf_error_bstr() kfunc with variadic arguments
* instead of an array of u64. Invoking this macro will cause the scheduler to
* exit in an erroneous state, with diagnostic information being passed to the
- * user.
+ * user. It appends the file and line number to aid debugging.
*/
#define scx_bpf_error(fmt, args...) \
({ \
- scx_bpf_bstr_preamble(fmt, args) \
+ scx_bpf_bstr_preamble( \
+ __FILE__ ":" SCX_TOSTRING(__LINE__) ": " fmt, ##args) \
scx_bpf_error_bstr(___fmt, ___param, sizeof(___param)); \
- ___scx_bpf_bstr_format_checker(fmt, ##args); \
+ ___scx_bpf_bstr_format_checker( \
+ __FILE__ ":" SCX_TOSTRING(__LINE__) ": " fmt, ##args); \
})
/*
@@ -229,6 +248,7 @@ BPF_PROG(name, ##args)
* be a pointer to the area. Use `MEMBER_VPTR(*ptr, .member)` instead of
* `MEMBER_VPTR(ptr, ->member)`.
*/
+#ifndef MEMBER_VPTR
#define MEMBER_VPTR(base, member) (typeof((base) member) *) \
({ \
u64 __base = (u64)&(base); \
@@ -245,6 +265,7 @@ BPF_PROG(name, ##args)
[max]"i"(sizeof(base) - sizeof((base) member))); \
__addr; \
})
+#endif /* MEMBER_VPTR */
/**
* ARRAY_ELEM_PTR - Obtain the verified pointer to an array element
@@ -260,6 +281,7 @@ BPF_PROG(name, ##args)
* size of the array to compute the max, which will result in rejection by
* the verifier.
*/
+#ifndef ARRAY_ELEM_PTR
#define ARRAY_ELEM_PTR(arr, i, n) (typeof(arr[i]) *) \
({ \
u64 __base = (u64)arr; \
@@ -274,7 +296,7 @@ BPF_PROG(name, ##args)
[max]"r"(sizeof(arr[0]) * ((n) - 1))); \
__addr; \
})
-
+#endif /* ARRAY_ELEM_PTR */
/*
* BPF declarations and helpers
@@ -438,8 +460,27 @@ static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask)
*/
static inline bool is_migration_disabled(const struct task_struct *p)
{
- if (bpf_core_field_exists(p->migration_disabled))
- return p->migration_disabled;
+ /*
+ * Testing p->migration_disabled in a BPF code is tricky because the
+ * migration is _always_ disabled while running the BPF code.
+ * The prolog (__bpf_prog_enter) and epilog (__bpf_prog_exit) for BPF
+ * code execution disable and re-enable the migration of the current
+ * task, respectively. So, the _current_ task of the sched_ext ops is
+ * always migration-disabled. Moreover, p->migration_disabled could be
+ * two or greater when a sched_ext ops BPF code (e.g., ops.tick) is
+ * executed in the middle of the other BPF code execution.
+ *
+ * Therefore, we should decide that the _current_ task is
+ * migration-disabled only when its migration_disabled count is greater
+ * than one. In other words, when p->migration_disabled == 1, there is
+ * an ambiguity, so we should check if @p is the current task or not.
+ */
+ if (bpf_core_field_exists(p->migration_disabled)) {
+ if (p->migration_disabled == 1)
+ return bpf_get_current_task_btf() != p;
+ else
+ return p->migration_disabled;
+ }
return false;
}
@@ -476,7 +517,7 @@ static inline s64 time_delta(u64 after, u64 before)
*/
static inline bool time_after(u64 a, u64 b)
{
- return (s64)(b - a) < 0;
+ return (s64)(b - a) < 0;
}
/**
@@ -500,7 +541,7 @@ static inline bool time_before(u64 a, u64 b)
*/
static inline bool time_after_eq(u64 a, u64 b)
{
- return (s64)(a - b) >= 0;
+ return (s64)(a - b) >= 0;
}
/**
@@ -547,9 +588,15 @@ static inline bool time_in_range_open(u64 a, u64 b, u64 c)
*/
/* useful compiler attributes */
+#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
+#endif
+#ifndef unlikely
#define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
+#ifndef __maybe_unused
#define __maybe_unused __attribute__((__unused__))
+#endif
/*
* READ/WRITE_ONCE() are from kernel (include/asm-generic/rwonce.h). They
@@ -633,6 +680,26 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
})
/*
+ * __calc_avg - Calculate exponential weighted moving average (EWMA) with
+ * @old and @new values. @decay represents how large the @old value remains.
+ * With a larger @decay value, the moving average changes slowly, exhibiting
+ * fewer fluctuations.
+ */
+#define __calc_avg(old, new, decay) ({ \
+ typeof(decay) thr = 1 << (decay); \
+ typeof(old) ret; \
+ if (((old) < thr) || ((new) < thr)) { \
+ if (((old) == 1) && ((new) == 0)) \
+ ret = 0; \
+ else \
+ ret = ((old) - ((old) >> 1)) + ((new) >> 1); \
+ } else { \
+ ret = ((old) - ((old) >> (decay))) + ((new) >> (decay)); \
+ } \
+ ret; \
+})
+
+/*
* log2_u32 - Compute the base 2 logarithm of a 32-bit exponential value.
* @v: The value for which we're computing the base 2 logarithm.
*/
@@ -663,6 +730,25 @@ static inline u32 log2_u64(u64 v)
}
/*
+ * sqrt_u64 - Calculate the square root of value @x using Newton's method.
+ */
+static inline u64 __sqrt_u64(u64 x)
+{
+ if (x == 0 || x == 1)
+ return x;
+
+ u64 r = ((1ULL << 32) > x) ? x : (1ULL << 32);
+
+ for (int i = 0; i < 8; ++i) {
+ u64 q = x / r;
+ if (r <= q)
+ break;
+ r = (r + q) >> 1;
+ }
+ return r;
+}
+
+/*
* Return a value proportionally scaled to the task's weight.
*/
static inline u64 scale_by_task_weight(const struct task_struct *p, u64 value)
diff --git a/tools/sched_ext/include/scx/common.h b/tools/sched_ext/include/scx/common.h
index 1dc76bd84296..b3c6372bcf81 100644
--- a/tools/sched_ext/include/scx/common.h
+++ b/tools/sched_ext/include/scx/common.h
@@ -75,8 +75,9 @@ typedef int64_t s64;
#include "enums.h"
/* not available when building kernel tools/sched_ext */
-#if __has_include(<lib/sdt_task.h>)
-#include <lib/sdt_task.h>
+#if __has_include(<lib/sdt_task_defs.h>)
+#include "bpf_arena_common.h"
+#include <lib/sdt_task_defs.h>
#endif
#endif /* __SCHED_EXT_COMMON_H */
diff --git a/tools/sched_ext/include/scx/compat.bpf.h b/tools/sched_ext/include/scx/compat.bpf.h
index 9252e1a00556..dd9144624dc9 100644
--- a/tools/sched_ext/include/scx/compat.bpf.h
+++ b/tools/sched_ext/include/scx/compat.bpf.h
@@ -38,6 +38,7 @@ void scx_bpf_dispatch_from_dsq_set_slice___compat(struct bpf_iter_scx_dsq *it__i
void scx_bpf_dispatch_from_dsq_set_vtime___compat(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
bool scx_bpf_dispatch_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
+int bpf_cpumask_populate(struct cpumask *dst, void *src, size_t src__sz) __ksym __weak;
#define scx_bpf_dsq_insert(p, dsq_id, slice, enq_flags) \
(bpf_ksym_exists(scx_bpf_dsq_insert) ? \
@@ -82,6 +83,10 @@ bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter,
scx_bpf_dispatch_vtime_from_dsq___compat((it__iter), (p), (dsq_id), (enq_flags)) : \
false))
+#define __COMPAT_bpf_cpumask_populate(cpumask, src, size__sz) \
+ (bpf_ksym_exists(bpf_cpumask_populate) ? \
+ (bpf_cpumask_populate(cpumask, src, size__sz)) : -EOPNOTSUPP)
+
#define scx_bpf_dispatch(p, dsq_id, slice, enq_flags) \
_Static_assert(false, "scx_bpf_dispatch() renamed to scx_bpf_dsq_insert()")
@@ -226,6 +231,23 @@ static inline bool __COMPAT_is_enq_cpu_selected(u64 enq_flags)
scx_bpf_pick_any_cpu(cpus_allowed, flags))
/*
+ * v6.18: Add a helper to retrieve the current task running on a CPU.
+ *
+ * Keep this helper available until v6.20 for compatibility.
+ */
+static inline struct task_struct *__COMPAT_scx_bpf_cpu_curr(int cpu)
+{
+ struct rq *rq;
+
+ if (bpf_ksym_exists(scx_bpf_cpu_curr))
+ return scx_bpf_cpu_curr(cpu);
+
+ rq = scx_bpf_cpu_rq(cpu);
+
+ return rq ? rq->curr : NULL;
+}
+
+/*
* Define sched_ext_ops. This may be expanded to define multiple variants for
* backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().
*/
diff --git a/tools/sched_ext/include/scx/user_exit_info.bpf.h b/tools/sched_ext/include/scx/user_exit_info.bpf.h
new file mode 100644
index 000000000000..e7ac6611a990
--- /dev/null
+++ b/tools/sched_ext/include/scx/user_exit_info.bpf.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Define struct user_exit_info which is shared between BPF and userspace parts
+ * to communicate exit status and other information.
+ *
+ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
+ * Copyright (c) 2022 David Vernet <dvernet@meta.com>
+ */
+
+#ifndef __USER_EXIT_INFO_BPF_H
+#define __USER_EXIT_INFO_BPF_H
+
+#ifndef LSP
+#include "vmlinux.h"
+#endif
+#include <bpf/bpf_core_read.h>
+
+#include "user_exit_info_common.h"
+
+#define UEI_DEFINE(__name) \
+ char RESIZABLE_ARRAY(data, __name##_dump); \
+ const volatile u32 __name##_dump_len; \
+ struct user_exit_info __name SEC(".data")
+
+#define UEI_RECORD(__uei_name, __ei) ({ \
+ bpf_probe_read_kernel_str(__uei_name.reason, \
+ sizeof(__uei_name.reason), (__ei)->reason); \
+ bpf_probe_read_kernel_str(__uei_name.msg, \
+ sizeof(__uei_name.msg), (__ei)->msg); \
+ bpf_probe_read_kernel_str(__uei_name##_dump, \
+ __uei_name##_dump_len, (__ei)->dump); \
+ if (bpf_core_field_exists((__ei)->exit_code)) \
+ __uei_name.exit_code = (__ei)->exit_code; \
+ /* use __sync to force memory barrier */ \
+ __sync_val_compare_and_swap(&__uei_name.kind, __uei_name.kind, \
+ (__ei)->kind); \
+})
+
+#endif /* __USER_EXIT_INFO_BPF_H */
diff --git a/tools/sched_ext/include/scx/user_exit_info.h b/tools/sched_ext/include/scx/user_exit_info.h
index 66f856640ee7..399697fa372f 100644
--- a/tools/sched_ext/include/scx/user_exit_info.h
+++ b/tools/sched_ext/include/scx/user_exit_info.h
@@ -10,55 +10,11 @@
#ifndef __USER_EXIT_INFO_H
#define __USER_EXIT_INFO_H
-#ifdef LSP
-#define __bpf__
-#include "../vmlinux.h"
-#endif
-
-enum uei_sizes {
- UEI_REASON_LEN = 128,
- UEI_MSG_LEN = 1024,
- UEI_DUMP_DFL_LEN = 32768,
-};
-
-struct user_exit_info {
- int kind;
- s64 exit_code;
- char reason[UEI_REASON_LEN];
- char msg[UEI_MSG_LEN];
-};
-
-#ifdef __bpf__
-
-#ifndef LSP
-#include "vmlinux.h"
-#endif
-#include <bpf/bpf_core_read.h>
-
-#define UEI_DEFINE(__name) \
- char RESIZABLE_ARRAY(data, __name##_dump); \
- const volatile u32 __name##_dump_len; \
- struct user_exit_info __name SEC(".data")
-
-#define UEI_RECORD(__uei_name, __ei) ({ \
- bpf_probe_read_kernel_str(__uei_name.reason, \
- sizeof(__uei_name.reason), (__ei)->reason); \
- bpf_probe_read_kernel_str(__uei_name.msg, \
- sizeof(__uei_name.msg), (__ei)->msg); \
- bpf_probe_read_kernel_str(__uei_name##_dump, \
- __uei_name##_dump_len, (__ei)->dump); \
- if (bpf_core_field_exists((__ei)->exit_code)) \
- __uei_name.exit_code = (__ei)->exit_code; \
- /* use __sync to force memory barrier */ \
- __sync_val_compare_and_swap(&__uei_name.kind, __uei_name.kind, \
- (__ei)->kind); \
-})
-
-#else /* !__bpf__ */
-
#include <stdio.h>
#include <stdbool.h>
+#include "user_exit_info_common.h"
+
/* no need to call the following explicitly if SCX_OPS_LOAD() is used */
#define UEI_SET_SIZE(__skel, __ops_name, __uei_name) ({ \
u32 __len = (__skel)->struct_ops.__ops_name->exit_dump_len ?: UEI_DUMP_DFL_LEN; \
@@ -114,5 +70,4 @@ enum uei_ecode_mask {
#define UEI_ECODE_RESTART(__ecode) (UEI_ECODE_SYS_ACT((__ecode)) == SCX_ECODE_ACT_RESTART)
-#endif /* __bpf__ */
#endif /* __USER_EXIT_INFO_H */
diff --git a/tools/sched_ext/include/scx/user_exit_info_common.h b/tools/sched_ext/include/scx/user_exit_info_common.h
new file mode 100644
index 000000000000..2d0981aedd89
--- /dev/null
+++ b/tools/sched_ext/include/scx/user_exit_info_common.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Define struct user_exit_info which is shared between BPF and userspace parts
+ * to communicate exit status and other information.
+ *
+ * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
+ * Copyright (c) 2022 David Vernet <dvernet@meta.com>
+ */
+#ifndef __USER_EXIT_INFO_COMMON_H
+#define __USER_EXIT_INFO_COMMON_H
+
+#ifdef LSP
+#include "../vmlinux.h"
+#endif
+
+enum uei_sizes {
+ UEI_REASON_LEN = 128,
+ UEI_MSG_LEN = 1024,
+ UEI_DUMP_DFL_LEN = 32768,
+};
+
+struct user_exit_info {
+ int kind;
+ s64 exit_code;
+ char reason[UEI_REASON_LEN];
+ char msg[UEI_MSG_LEN];
+};
+
+#endif /* __USER_EXIT_INFO_COMMON_H */
diff --git a/tools/sched_ext/scx_central.bpf.c b/tools/sched_ext/scx_central.bpf.c
index 50bc1737c167..55df8b798865 100644
--- a/tools/sched_ext/scx_central.bpf.c
+++ b/tools/sched_ext/scx_central.bpf.c
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * A central FIFO sched_ext scheduler which demonstrates the followings:
+ * A central FIFO sched_ext scheduler which demonstrates the following:
*
* a. Making all scheduling decisions from one CPU:
*
diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c
index 6ba6e610eeaa..55931a4cd71c 100644
--- a/tools/sched_ext/scx_central.c
+++ b/tools/sched_ext/scx_central.c
@@ -61,6 +61,7 @@ restart:
skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();
skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
+ assert(skel->rodata->nr_cpu_ids > 0);
assert(skel->rodata->nr_cpu_ids <= INT32_MAX);
while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) {
diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c
index fdc7170639e6..2c720e3ecad5 100644
--- a/tools/sched_ext/scx_flatcg.bpf.c
+++ b/tools/sched_ext/scx_flatcg.bpf.c
@@ -950,5 +950,5 @@ SCX_OPS_DEFINE(flatcg_ops,
.cgroup_move = (void *)fcg_cgroup_move,
.init = (void *)fcg_init,
.exit = (void *)fcg_exit,
- .flags = SCX_OPS_ENQ_EXITING,
+ .flags = SCX_OPS_HAS_CGROUP_WEIGHT | SCX_OPS_ENQ_EXITING,
.name = "flatcg");
diff --git a/tools/sched_ext/scx_flatcg.c b/tools/sched_ext/scx_flatcg.c
index 6dd423eeb4ff..cd85eb401179 100644
--- a/tools/sched_ext/scx_flatcg.c
+++ b/tools/sched_ext/scx_flatcg.c
@@ -6,6 +6,7 @@
*/
#include <stdio.h>
#include <signal.h>
+#include <assert.h>
#include <unistd.h>
#include <libgen.h>
#include <limits.h>
@@ -137,6 +138,7 @@ restart:
skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg);
skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+ assert(skel->rodata->nr_cpus > 0);
skel->rodata->cgrp_slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) {
diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c
index 69d877501cb7..3072b593f898 100644
--- a/tools/sched_ext/scx_qmap.bpf.c
+++ b/tools/sched_ext/scx_qmap.bpf.c
@@ -39,7 +39,8 @@ const volatile u32 stall_kernel_nth;
const volatile u32 dsp_inf_loop_after;
const volatile u32 dsp_batch;
const volatile bool highpri_boosting;
-const volatile bool print_shared_dsq;
+const volatile bool print_dsqs_and_events;
+const volatile bool print_msgs;
const volatile s32 disallow_tgid;
const volatile bool suppress_dump;
@@ -56,7 +57,8 @@ struct qmap {
queue1 SEC(".maps"),
queue2 SEC(".maps"),
queue3 SEC(".maps"),
- queue4 SEC(".maps");
+ queue4 SEC(".maps"),
+ dump_store SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
@@ -578,11 +580,26 @@ void BPF_STRUCT_OPS(qmap_dump, struct scx_dump_ctx *dctx)
return;
scx_bpf_dump("QMAP FIFO[%d]:", i);
+
+ /*
+ * Dump can be invoked anytime and there is no way to iterate in
+ * a non-destructive way. Pop and store in dump_store and then
+ * restore afterwards. If racing against new enqueues, ordering
+ * can get mixed up.
+ */
bpf_repeat(4096) {
if (bpf_map_pop_elem(fifo, &pid))
break;
+ bpf_map_push_elem(&dump_store, &pid, 0);
scx_bpf_dump(" %d", pid);
}
+
+ bpf_repeat(4096) {
+ if (bpf_map_pop_elem(&dump_store, &pid))
+ break;
+ bpf_map_push_elem(fifo, &pid, 0);
+ }
+
scx_bpf_dump("\n");
}
}
@@ -617,22 +634,25 @@ void BPF_STRUCT_OPS(qmap_dump_task, struct scx_dump_ctx *dctx, struct task_struc
s32 BPF_STRUCT_OPS(qmap_cgroup_init, struct cgroup *cgrp, struct scx_cgroup_init_args *args)
{
- bpf_printk("CGRP INIT %llu weight=%u period=%lu quota=%ld burst=%lu",
- cgrp->kn->id, args->weight, args->bw_period_us,
- args->bw_quota_us, args->bw_burst_us);
+ if (print_msgs)
+ bpf_printk("CGRP INIT %llu weight=%u period=%lu quota=%ld burst=%lu",
+ cgrp->kn->id, args->weight, args->bw_period_us,
+ args->bw_quota_us, args->bw_burst_us);
return 0;
}
void BPF_STRUCT_OPS(qmap_cgroup_set_weight, struct cgroup *cgrp, u32 weight)
{
- bpf_printk("CGRP SET %llu weight=%u", cgrp->kn->id, weight);
+ if (print_msgs)
+ bpf_printk("CGRP SET %llu weight=%u", cgrp->kn->id, weight);
}
void BPF_STRUCT_OPS(qmap_cgroup_set_bandwidth, struct cgroup *cgrp,
u64 period_us, u64 quota_us, u64 burst_us)
{
- bpf_printk("CGRP SET %llu period=%lu quota=%ld burst=%lu", cgrp->kn->id,
- period_us, quota_us, burst_us);
+ if (print_msgs)
+ bpf_printk("CGRP SET %llu period=%lu quota=%ld burst=%lu",
+ cgrp->kn->id, period_us, quota_us, burst_us);
}
/*
@@ -676,16 +696,20 @@ static void print_cpus(void)
void BPF_STRUCT_OPS(qmap_cpu_online, s32 cpu)
{
- bpf_printk("CPU %d coming online", cpu);
- /* @cpu is already online at this point */
- print_cpus();
+ if (print_msgs) {
+ bpf_printk("CPU %d coming online", cpu);
+ /* @cpu is already online at this point */
+ print_cpus();
+ }
}
void BPF_STRUCT_OPS(qmap_cpu_offline, s32 cpu)
{
- bpf_printk("CPU %d going offline", cpu);
- /* @cpu is still online at this point */
- print_cpus();
+ if (print_msgs) {
+ bpf_printk("CPU %d going offline", cpu);
+ /* @cpu is still online at this point */
+ print_cpus();
+ }
}
struct monitor_timer {
@@ -783,35 +807,36 @@ static void dump_shared_dsq(void)
static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer)
{
- struct scx_event_stats events;
-
bpf_rcu_read_lock();
dispatch_highpri(true);
bpf_rcu_read_unlock();
monitor_cpuperf();
- if (print_shared_dsq)
+ if (print_dsqs_and_events) {
+ struct scx_event_stats events;
+
dump_shared_dsq();
- __COMPAT_scx_bpf_events(&events, sizeof(events));
-
- bpf_printk("%35s: %lld", "SCX_EV_SELECT_CPU_FALLBACK",
- scx_read_event(&events, SCX_EV_SELECT_CPU_FALLBACK));
- bpf_printk("%35s: %lld", "SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE",
- scx_read_event(&events, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE));
- bpf_printk("%35s: %lld", "SCX_EV_DISPATCH_KEEP_LAST",
- scx_read_event(&events, SCX_EV_DISPATCH_KEEP_LAST));
- bpf_printk("%35s: %lld", "SCX_EV_ENQ_SKIP_EXITING",
- scx_read_event(&events, SCX_EV_ENQ_SKIP_EXITING));
- bpf_printk("%35s: %lld", "SCX_EV_REFILL_SLICE_DFL",
- scx_read_event(&events, SCX_EV_REFILL_SLICE_DFL));
- bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DURATION",
- scx_read_event(&events, SCX_EV_BYPASS_DURATION));
- bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DISPATCH",
- scx_read_event(&events, SCX_EV_BYPASS_DISPATCH));
- bpf_printk("%35s: %lld", "SCX_EV_BYPASS_ACTIVATE",
- scx_read_event(&events, SCX_EV_BYPASS_ACTIVATE));
+ __COMPAT_scx_bpf_events(&events, sizeof(events));
+
+ bpf_printk("%35s: %lld", "SCX_EV_SELECT_CPU_FALLBACK",
+ scx_read_event(&events, SCX_EV_SELECT_CPU_FALLBACK));
+ bpf_printk("%35s: %lld", "SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE",
+ scx_read_event(&events, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE));
+ bpf_printk("%35s: %lld", "SCX_EV_DISPATCH_KEEP_LAST",
+ scx_read_event(&events, SCX_EV_DISPATCH_KEEP_LAST));
+ bpf_printk("%35s: %lld", "SCX_EV_ENQ_SKIP_EXITING",
+ scx_read_event(&events, SCX_EV_ENQ_SKIP_EXITING));
+ bpf_printk("%35s: %lld", "SCX_EV_REFILL_SLICE_DFL",
+ scx_read_event(&events, SCX_EV_REFILL_SLICE_DFL));
+ bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DURATION",
+ scx_read_event(&events, SCX_EV_BYPASS_DURATION));
+ bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DISPATCH",
+ scx_read_event(&events, SCX_EV_BYPASS_DISPATCH));
+ bpf_printk("%35s: %lld", "SCX_EV_BYPASS_ACTIVATE",
+ scx_read_event(&events, SCX_EV_BYPASS_ACTIVATE));
+ }
bpf_timer_start(timer, ONE_SEC_IN_NS, 0);
return 0;
@@ -823,7 +848,8 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init)
struct bpf_timer *timer;
s32 ret;
- print_cpus();
+ if (print_msgs)
+ print_cpus();
ret = scx_bpf_create_dsq(SHARED_DSQ, -1);
if (ret)
diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c
index c4912ab2e76f..ef701d45ba43 100644
--- a/tools/sched_ext/scx_qmap.c
+++ b/tools/sched_ext/scx_qmap.c
@@ -20,7 +20,7 @@ const char help_fmt[] =
"See the top-level comment in .bpf.c for more details.\n"
"\n"
"Usage: %s [-s SLICE_US] [-e COUNT] [-t COUNT] [-T COUNT] [-l COUNT] [-b COUNT]\n"
-" [-P] [-d PID] [-D LEN] [-p] [-v]\n"
+" [-P] [-M] [-d PID] [-D LEN] [-p] [-v]\n"
"\n"
" -s SLICE_US Override slice duration\n"
" -e COUNT Trigger scx_bpf_error() after COUNT enqueues\n"
@@ -28,7 +28,8 @@ const char help_fmt[] =
" -T COUNT Stall every COUNT'th kernel thread\n"
" -l COUNT Trigger dispatch infinite looping after COUNT dispatches\n"
" -b COUNT Dispatch upto COUNT tasks together\n"
-" -P Print out DSQ content to trace_pipe every second, use with -b\n"
+" -P Print out DSQ content and event counters to trace_pipe every second\n"
+" -M Print out debug messages to trace_pipe\n"
" -H Boost nice -20 tasks in SHARED_DSQ, use with -b\n"
" -d PID Disallow a process from switching into SCHED_EXT (-1 for self)\n"
" -D LEN Set scx_exit_info.dump buffer length\n"
@@ -66,7 +67,7 @@ int main(int argc, char **argv)
skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
- while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PHd:D:Spvh")) != -1) {
+ while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PMHd:D:Spvh")) != -1) {
switch (opt) {
case 's':
skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
@@ -87,7 +88,10 @@ int main(int argc, char **argv)
skel->rodata->dsp_batch = strtoul(optarg, NULL, 0);
break;
case 'P':
- skel->rodata->print_shared_dsq = true;
+ skel->rodata->print_dsqs_and_events = true;
+ break;
+ case 'M':
+ skel->rodata->print_msgs = true;
break;
case 'H':
skel->rodata->highpri_boosting = true;
diff --git a/tools/sched_ext/scx_simple.c b/tools/sched_ext/scx_simple.c
index 76d83199545c..06d4b13bf76b 100644
--- a/tools/sched_ext/scx_simple.c
+++ b/tools/sched_ext/scx_simple.c
@@ -7,6 +7,7 @@
#include <stdio.h>
#include <unistd.h>
#include <signal.h>
+#include <assert.h>
#include <libgen.h>
#include <bpf/bpf.h>
#include <scx/common.h>
@@ -41,6 +42,7 @@ static void sigint_handler(int simple)
static void read_stats(struct scx_simple *skel, __u64 *stats)
{
int nr_cpus = libbpf_num_possible_cpus();
+ assert(nr_cpus > 0);
__u64 cnts[2][nr_cpus];
__u32 idx;
diff --git a/tools/scripts/syscall.tbl b/tools/scripts/syscall.tbl
index 580b4e246aec..d1ae5e92c615 100644
--- a/tools/scripts/syscall.tbl
+++ b/tools/scripts/syscall.tbl
@@ -408,3 +408,5 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index 002ec38a8bbb..3b96d090c5eb 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -17,6 +17,8 @@
#include <asm/sigcontext.h>
#include <asm/unistd.h>
+#include <linux/auxvec.h>
+
#include "../../kselftest.h"
#define TESTS_PER_HWCAP 3
@@ -55,7 +57,6 @@ static void cmpbr_sigill(void)
/* Not implemented, too complicated and unreliable anyway */
}
-
static void crc32_sigill(void)
{
/* CRC32W W0, W0, W1 */
@@ -169,6 +170,18 @@ static void lse128_sigill(void)
: "cc", "memory");
}
+static void lsfe_sigill(void)
+{
+ float __attribute__ ((aligned (16))) mem;
+ register float *memp asm ("x0") = &mem;
+
+ /* STFADD H0, [X0] */
+ asm volatile(".inst 0x7c20801f"
+ : "+r" (memp)
+ :
+ : "memory");
+}
+
static void lut_sigill(void)
{
/* LUTI2 V0.16B, { V0.16B }, V[0] */
@@ -763,6 +776,13 @@ static const struct hwcap_data {
.sigill_fn = lse128_sigill,
},
{
+ .name = "LSFE",
+ .at_hwcap = AT_HWCAP3,
+ .hwcap_bit = HWCAP3_LSFE,
+ .cpuinfo = "lsfe",
+ .sigill_fn = lsfe_sigill,
+ },
+ {
.name = "LUT",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_LUT,
diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c
index 3b520b7efa49..1703543fb7c7 100644
--- a/tools/testing/selftests/arm64/abi/tpidr2.c
+++ b/tools/testing/selftests/arm64/abi/tpidr2.c
@@ -227,10 +227,10 @@ int main(int argc, char **argv)
ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0);
if (ret >= 0) {
ksft_test_result(default_value(), "default_value\n");
- ksft_test_result(write_read, "write_read\n");
- ksft_test_result(write_sleep_read, "write_sleep_read\n");
- ksft_test_result(write_fork_read, "write_fork_read\n");
- ksft_test_result(write_clone_read, "write_clone_read\n");
+ ksft_test_result(write_read(), "write_read\n");
+ ksft_test_result(write_sleep_read(), "write_sleep_read\n");
+ ksft_test_result(write_fork_read(), "write_fork_read\n");
+ ksft_test_result(write_clone_read(), "write_clone_read\n");
} else {
ksft_print_msg("SME support not present\n");
diff --git a/tools/testing/selftests/arm64/bti/assembler.h b/tools/testing/selftests/arm64/bti/assembler.h
index 04e7b72880ef..141cdcbf0b8f 100644
--- a/tools/testing/selftests/arm64/bti/assembler.h
+++ b/tools/testing/selftests/arm64/bti/assembler.h
@@ -14,7 +14,6 @@
#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1)
-
.macro startfn name:req
.globl \name
\name:
diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c
index 124bc883365e..a85c19e9524e 100644
--- a/tools/testing/selftests/arm64/fp/fp-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c
@@ -1187,7 +1187,7 @@ static void sve_write_sve(pid_t child, struct test_config *config)
if (!vl)
return;
- iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+ iov.iov_len = SVE_PT_SIZE(vq, SVE_PT_REGS_SVE);
iov.iov_base = malloc(iov.iov_len);
if (!iov.iov_base) {
ksft_print_msg("Failed allocating %lu byte SVE write buffer\n",
@@ -1234,8 +1234,7 @@ static void sve_write_fpsimd(pid_t child, struct test_config *config)
if (!vl)
return;
- iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq,
- SVE_PT_REGS_FPSIMD);
+ iov.iov_len = SVE_PT_SIZE(vq, SVE_PT_REGS_FPSIMD);
iov.iov_base = malloc(iov.iov_len);
if (!iov.iov_base) {
ksft_print_msg("Failed allocating %lu byte SVE write buffer\n",
@@ -1569,7 +1568,6 @@ static void run_sve_tests(void)
&test_config);
}
}
-
}
static void run_sme_tests(void)
diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c
index 74e23208b94c..9349aa630c84 100644
--- a/tools/testing/selftests/arm64/fp/fp-stress.c
+++ b/tools/testing/selftests/arm64/fp/fp-stress.c
@@ -105,8 +105,8 @@ static void child_start(struct child_data *child, const char *program)
/*
* Read from the startup pipe, there should be no data
- * and we should block until it is closed. We just
- * carry on on error since this isn't super critical.
+ * and we should block until it is closed. We just
+ * carry-on on error since this isn't super critical.
*/
ret = read(3, &i, sizeof(i));
if (ret < 0)
@@ -549,7 +549,7 @@ int main(int argc, char **argv)
evs = calloc(tests, sizeof(*evs));
if (!evs)
- ksft_exit_fail_msg("Failed to allocated %d epoll events\n",
+ ksft_exit_fail_msg("Failed to allocate %d epoll events\n",
tests);
for (i = 0; i < cpus; i++) {
diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c
index e3cec3723ffa..0c40007d1282 100644
--- a/tools/testing/selftests/arm64/fp/kernel-test.c
+++ b/tools/testing/selftests/arm64/fp/kernel-test.c
@@ -188,13 +188,13 @@ static bool create_socket(void)
ref = malloc(digest_len);
if (!ref) {
- printf("Failed to allocated %d byte reference\n", digest_len);
+ printf("Failed to allocate %d byte reference\n", digest_len);
return false;
}
digest = malloc(digest_len);
if (!digest) {
- printf("Failed to allocated %d byte digest\n", digest_len);
+ printf("Failed to allocate %d byte digest\n", digest_len);
return false;
}
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index b22303778fb0..e0fc3a001e28 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -66,7 +66,7 @@ static const struct vec_type vec_types[] = {
};
#define VL_TESTS (((TEST_VQ_MAX - SVE_VQ_MIN) + 1) * 4)
-#define FLAG_TESTS 2
+#define FLAG_TESTS 4
#define FPSIMD_TESTS 2
#define EXPECTED_TESTS ((VL_TESTS + FLAG_TESTS + FPSIMD_TESTS) * ARRAY_SIZE(vec_types))
@@ -95,19 +95,27 @@ static int do_child(void)
static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
{
struct iovec iov;
+ int ret;
iov.iov_base = fpsimd;
iov.iov_len = sizeof(*fpsimd);
- return ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov);
+ ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov);
+ if (ret == -1)
+ ksft_perror("ptrace(PTRACE_GETREGSET)");
+ return ret;
}
static int set_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
{
struct iovec iov;
+ int ret;
iov.iov_base = fpsimd;
iov.iov_len = sizeof(*fpsimd);
- return ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov);
+ ret = ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov);
+ if (ret == -1)
+ ksft_perror("ptrace(PTRACE_SETREGSET)");
+ return ret;
}
static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type,
@@ -115,8 +123,9 @@ static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type,
{
struct user_sve_header *sve;
void *p;
- size_t sz = sizeof *sve;
+ size_t sz = sizeof(*sve);
struct iovec iov;
+ int ret;
while (1) {
if (*size < sz) {
@@ -132,8 +141,11 @@ static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type,
iov.iov_base = *buf;
iov.iov_len = sz;
- if (ptrace(PTRACE_GETREGSET, pid, type->regset, &iov))
+ ret = ptrace(PTRACE_GETREGSET, pid, type->regset, &iov);
+ if (ret) {
+ ksft_perror("ptrace(PTRACE_GETREGSET)");
goto error;
+ }
sve = *buf;
if (sve->size <= sz)
@@ -152,10 +164,46 @@ static int set_sve(pid_t pid, const struct vec_type *type,
const struct user_sve_header *sve)
{
struct iovec iov;
+ int ret;
iov.iov_base = (void *)sve;
iov.iov_len = sve->size;
- return ptrace(PTRACE_SETREGSET, pid, type->regset, &iov);
+ ret = ptrace(PTRACE_SETREGSET, pid, type->regset, &iov);
+ if (ret == -1)
+ ksft_perror("ptrace(PTRACE_SETREGSET)");
+ return ret;
+}
+
+/* A read operation fails */
+static void read_fails(pid_t child, const struct vec_type *type)
+{
+ struct user_sve_header *new_sve = NULL;
+ size_t new_sve_size = 0;
+ void *ret;
+
+ ret = get_sve(child, type, (void **)&new_sve, &new_sve_size);
+
+ ksft_test_result(ret == NULL, "%s unsupported read fails\n",
+ type->name);
+
+ free(new_sve);
+}
+
+/* A write operation fails */
+static void write_fails(pid_t child, const struct vec_type *type)
+{
+ struct user_sve_header sve;
+ int ret;
+
+ /* Just the header, no data */
+ memset(&sve, 0, sizeof(sve));
+ sve.size = sizeof(sve);
+ sve.flags = SVE_PT_REGS_SVE;
+ sve.vl = SVE_VL_MIN;
+ ret = set_sve(child, type, &sve);
+
+ ksft_test_result(ret != 0, "%s unsupported write fails\n",
+ type->name);
}
/* Validate setting and getting the inherit flag */
@@ -270,6 +318,25 @@ static void check_u32(unsigned int vl, const char *reg,
}
}
+/* Set out of range VLs */
+static void ptrace_set_vl_ranges(pid_t child, const struct vec_type *type)
+{
+ struct user_sve_header sve;
+ int ret;
+
+ memset(&sve, 0, sizeof(sve));
+ sve.flags = SVE_PT_REGS_SVE;
+ sve.size = sizeof(sve);
+
+ ret = set_sve(child, type, &sve);
+ ksft_test_result(ret != 0, "%s Set invalid VL 0\n", type->name);
+
+ sve.vl = SVE_VL_MAX + SVE_VQ_BYTES;
+ ret = set_sve(child, type, &sve);
+ ksft_test_result(ret != 0, "%s Set invalid VL %d\n", type->name,
+ SVE_VL_MAX + SVE_VQ_BYTES);
+}
+
/* Access the FPSIMD registers via the SVE regset */
static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type)
{
@@ -683,6 +750,20 @@ static int do_parent(pid_t child)
}
for (i = 0; i < ARRAY_SIZE(vec_types); i++) {
+ /*
+ * If the vector type isn't supported reads and writes
+ * should fail.
+ */
+ if (!(getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap)) {
+ read_fails(child, &vec_types[i]);
+ write_fails(child, &vec_types[i]);
+ } else {
+ ksft_test_result_skip("%s unsupported read fails\n",
+ vec_types[i].name);
+ ksft_test_result_skip("%s unsupported write fails\n",
+ vec_types[i].name);
+ }
+
/* FPSIMD via SVE regset */
if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) {
ptrace_sve_fpsimd(child, &vec_types[i]);
@@ -703,6 +784,17 @@ static int do_parent(pid_t child)
vec_types[i].name);
}
+ /* Setting out of bounds VLs should fail */
+ if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) {
+ ptrace_set_vl_ranges(child, &vec_types[i]);
+ } else {
+ ksft_test_result_skip("%s Set invalid VL 0\n",
+ vec_types[i].name);
+ ksft_test_result_skip("%s Set invalid VL %d\n",
+ vec_types[i].name,
+ SVE_VL_MAX + SVE_VQ_BYTES);
+ }
+
/* Step through every possible VQ */
for (vq = SVE_VQ_MIN; vq <= TEST_VQ_MAX; vq++) {
vl = sve_vl_from_vq(vq);
diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c
index ea9c7d47790f..2d75d342eeb9 100644
--- a/tools/testing/selftests/arm64/fp/vec-syscfg.c
+++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c
@@ -690,7 +690,6 @@ static inline void smstop(void)
asm volatile("msr S0_3_C4_C6_3, xzr");
}
-
/*
* Verify we can change the SVE vector length while SME is active and
* continue to use SME afterwards.
diff --git a/tools/testing/selftests/arm64/fp/zt-ptrace.c b/tools/testing/selftests/arm64/fp/zt-ptrace.c
index 584b8d59b7ea..a7f34040fbf1 100644
--- a/tools/testing/selftests/arm64/fp/zt-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/zt-ptrace.c
@@ -108,7 +108,6 @@ static int get_zt(pid_t pid, char zt[ZT_SIG_REG_BYTES])
return ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZT, &iov);
}
-
static int set_zt(pid_t pid, const char zt[ZT_SIG_REG_BYTES])
{
struct iovec iov;
diff --git a/tools/testing/selftests/arm64/gcs/Makefile b/tools/testing/selftests/arm64/gcs/Makefile
index d2f3497a9103..1fbbf0ca1f02 100644
--- a/tools/testing/selftests/arm64/gcs/Makefile
+++ b/tools/testing/selftests/arm64/gcs/Makefile
@@ -14,11 +14,11 @@ LDLIBS+=-lpthread
include ../../lib.mk
$(OUTPUT)/basic-gcs: basic-gcs.c
- $(CC) -g -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
- -static -include ../../../../include/nolibc/nolibc.h \
+ $(CC) $(CFLAGS) -fno-asynchronous-unwind-tables -fno-ident -s -nostdlib -nostdinc \
+ -static -I../../../../include/nolibc -include ../../../../include/nolibc/nolibc.h \
-I../../../../../usr/include \
-std=gnu99 -I../.. -g \
- -ffreestanding -Wall $^ -o $@ -lgcc
+ -ffreestanding $^ -o $@ -lgcc
$(OUTPUT)/gcs-stress-thread: gcs-stress-thread.S
$(CC) -nostdlib $^ -o $@
diff --git a/tools/testing/selftests/arm64/gcs/basic-gcs.c b/tools/testing/selftests/arm64/gcs/basic-gcs.c
index 54f9c888249d..250977abc398 100644
--- a/tools/testing/selftests/arm64/gcs/basic-gcs.c
+++ b/tools/testing/selftests/arm64/gcs/basic-gcs.c
@@ -10,6 +10,7 @@
#include <sys/mman.h>
#include <asm/mman.h>
+#include <asm/hwcap.h>
#include <linux/sched.h>
#include "kselftest.h"
@@ -386,14 +387,13 @@ int main(void)
ksft_print_header();
- /*
- * We don't have getauxval() with nolibc so treat a failure to
- * read GCS state as a lack of support and skip.
- */
+ if (!(getauxval(AT_HWCAP) & HWCAP_GCS))
+ ksft_exit_skip("SKIP GCS not supported\n");
+
ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS,
&gcs_mode, 0, 0, 0);
if (ret != 0)
- ksft_exit_skip("Failed to read GCS state: %d\n", ret);
+ ksft_exit_fail_msg("Failed to read GCS state: %d\n", ret);
if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) {
gcs_mode = PR_SHADOW_STACK_ENABLE;
@@ -410,7 +410,7 @@ int main(void)
}
/* One last test: disable GCS, we can do this one time */
- my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0);
+ ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0);
if (ret != 0)
ksft_print_msg("Failed to disable GCS: %d\n", ret);
diff --git a/tools/testing/selftests/arm64/gcs/gcs-locking.c b/tools/testing/selftests/arm64/gcs/gcs-locking.c
index 989f75a491b7..1e6abb136ffd 100644
--- a/tools/testing/selftests/arm64/gcs/gcs-locking.c
+++ b/tools/testing/selftests/arm64/gcs/gcs-locking.c
@@ -165,7 +165,6 @@ TEST_F(valid_modes, lock_enable_disable_others)
ASSERT_EQ(ret, 0);
ASSERT_EQ(mode, PR_SHADOW_STACK_ALL_MODES);
-
ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS,
variant->mode);
ASSERT_EQ(ret, 0);
diff --git a/tools/testing/selftests/arm64/gcs/gcs-stress.c b/tools/testing/selftests/arm64/gcs/gcs-stress.c
index bbc7f4950c13..cf316d78ea97 100644
--- a/tools/testing/selftests/arm64/gcs/gcs-stress.c
+++ b/tools/testing/selftests/arm64/gcs/gcs-stress.c
@@ -433,7 +433,7 @@ int main(int argc, char **argv)
evs = calloc(tests, sizeof(*evs));
if (!evs)
- ksft_exit_fail_msg("Failed to allocated %d epoll events\n",
+ ksft_exit_fail_msg("Failed to allocate %d epoll events\n",
tests);
for (i = 0; i < gcs_threads; i++)
diff --git a/tools/testing/selftests/arm64/pauth/exec_target.c b/tools/testing/selftests/arm64/pauth/exec_target.c
index 4435600ca400..e597861b26d6 100644
--- a/tools/testing/selftests/arm64/pauth/exec_target.c
+++ b/tools/testing/selftests/arm64/pauth/exec_target.c
@@ -13,7 +13,12 @@ int main(void)
unsigned long hwcaps;
size_t val;
- fread(&val, sizeof(size_t), 1, stdin);
+ size_t size = fread(&val, sizeof(size_t), 1, stdin);
+
+ if (size != 1) {
+ fprintf(stderr, "Could not read input from stdin\n");
+ return EXIT_FAILURE;
+ }
/* don't try to execute illegal (unimplemented) instructions) caller
* should have checked this and keep worker simple
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 3d8378972d26..be1ee7ba7ce0 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -44,3 +44,4 @@ xdp_redirect_multi
xdp_synproxy
xdp_hw_metadata
xdp_features
+verification_cert.h
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 3ebd77206f98..a17baf8c6fd7 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -2,4 +2,3 @@
# Alphabetical order
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
-verifier_iterating_callbacks
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 4863106034df..f00587d4ede6 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -120,7 +120,7 @@ TEST_PROGS_EXTENDED := \
test_bpftool.py
TEST_KMODS := bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \
- bpf_test_modorder_y.ko
+ bpf_test_modorder_y.ko bpf_test_rqspinlock.ko
TEST_KMOD_TARGETS = $(addprefix $(OUTPUT)/,$(TEST_KMODS))
# Compile but not part of 'make run_tests'
@@ -137,7 +137,7 @@ TEST_GEN_PROGS_EXTENDED = \
xdping \
xskxceiver
-TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi
+TEST_GEN_FILES += $(TEST_KMODS) liburandom_read.so urandom_read sign-file uprobe_multi
ifneq ($(V),1)
submake_extras := feature_display=0
@@ -398,7 +398,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
endif
-# vmlinux.h is first dumped to a temprorary file and then compared to
+# vmlinux.h is first dumped to a temporary file and then compared to
# the previous version. This helps to avoid unnecessary re-builds of
# $(TRUNNER_BPF_OBJS)
$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR)
@@ -496,15 +496,16 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
test_subskeleton.skel.h test_subskeleton_lib.skel.h \
test_usdt.skel.h
-LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \
- trace_printk.c trace_vprintk.c map_ptr_kern.c \
+LSKELS := fexit_sleep.c trace_printk.c trace_vprintk.c map_ptr_kern.c \
core_kern.c core_kern_overflow.c test_ringbuf.c \
test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c
+LSKELS_SIGNED := fentry_test.c fexit_test.c atomics.c
+
# Generate both light skeleton and libbpf skeleton for these
LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
kfunc_call_test_subprog.c
-SKEL_BLACKLIST += $$(LSKELS)
+SKEL_BLACKLIST += $$(LSKELS) $$(LSKELS_SIGNED)
test_static_linked.skel.h-deps := test_static_linked1.bpf.o test_static_linked2.bpf.o
linked_funcs.skel.h-deps := linked_funcs1.bpf.o linked_funcs2.bpf.o
@@ -535,6 +536,7 @@ HEADERS_FOR_BPF_OBJS := $(wildcard $(BPFDIR)/*.bpf.h) \
# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc)
define DEFINE_TEST_RUNNER
+LSKEL_SIGN := -S -k $(PRIVATE_KEY) -i $(VERIFICATION_CERT)
TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2
TRUNNER_BINARY := $1$(if $2,-)$2
TRUNNER_TEST_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.test.o, \
@@ -550,6 +552,7 @@ TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
$$(TRUNNER_BPF_SRCS)))
TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS) $$(LSKELS_EXTRA))
TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
+TRUNNER_BPF_LSKELS_SIGNED := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS_SIGNED))
TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
# Evaluate rules now with extra TRUNNER_XXX variables above already defined
@@ -604,6 +607,15 @@ $(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
$(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+$(TRUNNER_BPF_LSKELS_SIGNED): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
+ $$(call msg,GEN-SKEL,$(TRUNNER_BINARY) (signed),$$@)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$<
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o)
+ $(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+ $(Q)$$(BPFTOOL) gen skeleton $(LSKEL_SIGN) $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
+ $(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+
$(LINKED_BPF_OBJS): %: $(TRUNNER_OUTPUT)/%
# .SECONDEXPANSION here allows to correctly expand %-deps variables as prerequisites
@@ -653,6 +665,7 @@ $(TRUNNER_TEST_OBJS:.o=.d): $(TRUNNER_OUTPUT)/%.test.d: \
$(TRUNNER_EXTRA_HDRS) \
$(TRUNNER_BPF_SKELS) \
$(TRUNNER_BPF_LSKELS) \
+ $(TRUNNER_BPF_LSKELS_SIGNED) \
$(TRUNNER_BPF_SKELS_LINKED) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
@@ -667,6 +680,7 @@ $(foreach N,$(patsubst $(TRUNNER_OUTPUT)/%.o,%,$(TRUNNER_EXTRA_OBJS)), \
$(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
%.c \
$(TRUNNER_EXTRA_HDRS) \
+ $(VERIFY_SIG_HDR) \
$(TRUNNER_TESTS_HDR) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
@@ -697,6 +711,18 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
endef
+VERIFY_SIG_SETUP := $(CURDIR)/verify_sig_setup.sh
+VERIFY_SIG_HDR := verification_cert.h
+VERIFICATION_CERT := $(BUILD_DIR)/signing_key.der
+PRIVATE_KEY := $(BUILD_DIR)/signing_key.pem
+
+$(VERIFICATION_CERT) $(PRIVATE_KEY): $(VERIFY_SIG_SETUP)
+ $(Q)mkdir -p $(BUILD_DIR)
+ $(Q)$(VERIFY_SIG_SETUP) genkey $(BUILD_DIR)
+
+$(VERIFY_SIG_HDR): $(VERIFICATION_CERT)
+ $(Q)xxd -i -n test_progs_verification_cert $< > $@
+
# Define test_progs test runner.
TRUNNER_TESTS_DIR := prog_tests
TRUNNER_BPF_PROGS_DIR := progs
@@ -716,6 +742,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \
disasm.c \
disasm_helpers.c \
json_writer.c \
+ $(VERIFY_SIG_HDR) \
flow_dissector_load.h \
ip_check_defrag_frags.h
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
@@ -725,7 +752,7 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
$(OUTPUT)/uprobe_multi \
$(TEST_KMOD_TARGETS) \
ima_setup.sh \
- verify_sig_setup.sh \
+ $(VERIFY_SIG_SETUP) \
$(wildcard progs/btf_dump_test_case_*.c) \
$(wildcard progs/*.bpf.o)
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
@@ -816,6 +843,7 @@ $(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
$(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h
$(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h
$(OUTPUT)/bench_sockmap.o: $(OUTPUT)/bench_sockmap_prog.skel.h
+$(OUTPUT)/bench_lpm_trie_map.o: $(OUTPUT)/lpm_trie_bench.skel.h $(OUTPUT)/lpm_trie_map.skel.h
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
$(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -837,6 +865,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(OUTPUT)/bench_htab_mem.o \
$(OUTPUT)/bench_bpf_crypto.o \
$(OUTPUT)/bench_sockmap.o \
+ $(OUTPUT)/bench_lpm_trie_map.o \
#
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index ddd73d06a1eb..bd29bb2e6cb5 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -284,6 +284,7 @@ extern struct argp bench_htab_mem_argp;
extern struct argp bench_trigger_batch_argp;
extern struct argp bench_crypto_argp;
extern struct argp bench_sockmap_argp;
+extern struct argp bench_lpm_trie_map_argp;
static const struct argp_child bench_parsers[] = {
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -299,6 +300,7 @@ static const struct argp_child bench_parsers[] = {
{ &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 },
{ &bench_crypto_argp, 0, "bpf crypto benchmark", 0 },
{ &bench_sockmap_argp, 0, "bpf sockmap benchmark", 0 },
+ { &bench_lpm_trie_map_argp, 0, "LPM trie map benchmark", 0 },
{},
};
@@ -499,7 +501,7 @@ extern const struct bench bench_rename_rawtp;
extern const struct bench bench_rename_fentry;
extern const struct bench bench_rename_fexit;
-/* pure counting benchmarks to establish theoretical lmits */
+/* pure counting benchmarks to establish theoretical limits */
extern const struct bench bench_trig_usermode_count;
extern const struct bench bench_trig_syscall_count;
extern const struct bench bench_trig_kernel_count;
@@ -510,6 +512,8 @@ extern const struct bench bench_trig_kretprobe;
extern const struct bench bench_trig_kprobe_multi;
extern const struct bench bench_trig_kretprobe_multi;
extern const struct bench bench_trig_fentry;
+extern const struct bench bench_trig_kprobe_multi_all;
+extern const struct bench bench_trig_kretprobe_multi_all;
extern const struct bench bench_trig_fexit;
extern const struct bench bench_trig_fmodret;
extern const struct bench bench_trig_tp;
@@ -558,6 +562,13 @@ extern const struct bench bench_htab_mem;
extern const struct bench bench_crypto_encrypt;
extern const struct bench bench_crypto_decrypt;
extern const struct bench bench_sockmap;
+extern const struct bench bench_lpm_trie_noop;
+extern const struct bench bench_lpm_trie_baseline;
+extern const struct bench bench_lpm_trie_lookup;
+extern const struct bench bench_lpm_trie_insert;
+extern const struct bench bench_lpm_trie_update;
+extern const struct bench bench_lpm_trie_delete;
+extern const struct bench bench_lpm_trie_free;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -578,6 +589,8 @@ static const struct bench *benchs[] = {
&bench_trig_kprobe_multi,
&bench_trig_kretprobe_multi,
&bench_trig_fentry,
+ &bench_trig_kprobe_multi_all,
+ &bench_trig_kretprobe_multi_all,
&bench_trig_fexit,
&bench_trig_fmodret,
&bench_trig_tp,
@@ -625,6 +638,13 @@ static const struct bench *benchs[] = {
&bench_crypto_encrypt,
&bench_crypto_decrypt,
&bench_sockmap,
+ &bench_lpm_trie_noop,
+ &bench_lpm_trie_baseline,
+ &bench_lpm_trie_lookup,
+ &bench_lpm_trie_insert,
+ &bench_lpm_trie_update,
+ &bench_lpm_trie_delete,
+ &bench_lpm_trie_free,
};
static void find_benchmark(void)
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index 005c401b3e22..bea323820ffb 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -46,6 +46,7 @@ struct bench_res {
unsigned long gp_ns;
unsigned long gp_ct;
unsigned int stime;
+ unsigned long duration_ns;
};
struct bench {
diff --git a/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c
new file mode 100644
index 000000000000..246f6cb3387d
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c
@@ -0,0 +1,555 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Cloudflare */
+
+/*
+ * All of these benchmarks operate on tries with keys in the range
+ * [0, args.nr_entries), i.e. there are no gaps or partially filled
+ * branches of the trie for any key < args.nr_entries.
+ *
+ * This gives an idea of worst-case behaviour.
+ */
+
+#include <argp.h>
+#include <linux/time64.h>
+#include <linux/if_ether.h>
+#include "lpm_trie_bench.skel.h"
+#include "lpm_trie_map.skel.h"
+#include "bench.h"
+#include "testing_helpers.h"
+#include "progs/lpm_trie.h"
+
+static struct ctx {
+ struct lpm_trie_bench *bench;
+} ctx;
+
+static struct {
+ __u32 nr_entries;
+ __u32 prefixlen;
+ bool random;
+} args = {
+ .nr_entries = 0,
+ .prefixlen = 32,
+ .random = false,
+};
+
+enum {
+ ARG_NR_ENTRIES = 9000,
+ ARG_PREFIX_LEN,
+ ARG_RANDOM,
+};
+
+static const struct argp_option opts[] = {
+ { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0,
+ "Number of unique entries in the LPM trie" },
+ { "prefix_len", ARG_PREFIX_LEN, "PREFIX_LEN", 0,
+ "Number of prefix bits to use in the LPM trie" },
+ { "random", ARG_RANDOM, NULL, 0, "Access random keys during op" },
+ {},
+};
+
+static error_t lpm_parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_NR_ENTRIES:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "Invalid nr_entries count.");
+ argp_usage(state);
+ }
+ args.nr_entries = ret;
+ break;
+ case ARG_PREFIX_LEN:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "Invalid prefix_len value.");
+ argp_usage(state);
+ }
+ args.prefixlen = ret;
+ break;
+ case ARG_RANDOM:
+ args.random = true;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+const struct argp bench_lpm_trie_map_argp = {
+ .options = opts,
+ .parser = lpm_parse_arg,
+};
+
+static void validate_common(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer\n");
+ exit(1);
+ }
+
+ if (args.nr_entries == 0) {
+ fprintf(stderr, "Missing --nr_entries parameter\n");
+ exit(1);
+ }
+
+ if ((1UL << args.prefixlen) < args.nr_entries) {
+ fprintf(stderr, "prefix_len value too small for nr_entries\n");
+ exit(1);
+ }
+}
+
+static void lpm_insert_validate(void)
+{
+ validate_common();
+
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "lpm-trie-insert requires a single producer\n");
+ exit(1);
+ }
+
+ if (args.random) {
+ fprintf(stderr, "lpm-trie-insert does not support --random\n");
+ exit(1);
+ }
+}
+
+static void lpm_delete_validate(void)
+{
+ validate_common();
+
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "lpm-trie-delete requires a single producer\n");
+ exit(1);
+ }
+
+ if (args.random) {
+ fprintf(stderr, "lpm-trie-delete does not support --random\n");
+ exit(1);
+ }
+}
+
+static void lpm_free_validate(void)
+{
+ validate_common();
+
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "lpm-trie-free requires a single producer\n");
+ exit(1);
+ }
+
+ if (args.random) {
+ fprintf(stderr, "lpm-trie-free does not support --random\n");
+ exit(1);
+ }
+}
+
+static struct trie_key *keys;
+static __u32 *vals;
+
+static void fill_map(int map_fd)
+{
+ int err;
+
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ err = bpf_map_update_batch(map_fd, keys, vals, &args.nr_entries, &opts);
+ if (err) {
+ fprintf(stderr, "failed to batch update keys to map: %d\n",
+ -err);
+ exit(1);
+ }
+}
+
+static void empty_map(int map_fd)
+{
+ int err;
+
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ err = bpf_map_delete_batch(map_fd, keys, &args.nr_entries, &opts);
+ if (err) {
+ fprintf(stderr, "failed to batch delete keys for map: %d\n",
+ -err);
+ exit(1);
+ }
+}
+
+static void attach_prog(void)
+{
+ int i;
+
+ ctx.bench = lpm_trie_bench__open_and_load();
+ if (!ctx.bench) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ ctx.bench->bss->nr_entries = args.nr_entries;
+ ctx.bench->bss->prefixlen = args.prefixlen;
+ ctx.bench->bss->random = args.random;
+
+ if (lpm_trie_bench__attach(ctx.bench)) {
+ fprintf(stderr, "failed to attach skeleton\n");
+ exit(1);
+ }
+
+ keys = calloc(args.nr_entries, sizeof(*keys));
+ vals = calloc(args.nr_entries, sizeof(*vals));
+
+ for (i = 0; i < args.nr_entries; i++) {
+ struct trie_key *k = &keys[i];
+ __u32 *v = &vals[i];
+
+ k->prefixlen = args.prefixlen;
+ k->data = i;
+ *v = 1;
+ }
+}
+
+static void attach_prog_and_fill_map(void)
+{
+ int fd;
+
+ attach_prog();
+
+ fd = bpf_map__fd(ctx.bench->maps.trie_map);
+ fill_map(fd);
+}
+
+static void lpm_noop_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_NOOP;
+}
+
+static void lpm_baseline_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_BASELINE;
+}
+
+static void lpm_lookup_setup(void)
+{
+ attach_prog_and_fill_map();
+ ctx.bench->bss->op = LPM_OP_LOOKUP;
+}
+
+static void lpm_insert_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_INSERT;
+}
+
+static void lpm_update_setup(void)
+{
+ attach_prog_and_fill_map();
+ ctx.bench->bss->op = LPM_OP_UPDATE;
+}
+
+static void lpm_delete_setup(void)
+{
+ attach_prog_and_fill_map();
+ ctx.bench->bss->op = LPM_OP_DELETE;
+}
+
+static void lpm_free_setup(void)
+{
+ attach_prog();
+ ctx.bench->bss->op = LPM_OP_FREE;
+}
+
+static void lpm_measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.bench->bss->hits, 0);
+ res->duration_ns = atomic_swap(&ctx.bench->bss->duration_ns, 0);
+}
+
+static void bench_reinit_map(void)
+{
+ int fd = bpf_map__fd(ctx.bench->maps.trie_map);
+
+ switch (ctx.bench->bss->op) {
+ case LPM_OP_INSERT:
+ /* trie_map needs to be emptied */
+ empty_map(fd);
+ break;
+ case LPM_OP_DELETE:
+ /* trie_map needs to be refilled */
+ fill_map(fd);
+ break;
+ default:
+ fprintf(stderr, "Unexpected REINIT return code for op %d\n",
+ ctx.bench->bss->op);
+ exit(1);
+ }
+}
+
+/* For NOOP, BASELINE, LOOKUP, INSERT, UPDATE, and DELETE */
+static void *lpm_producer(void *unused __always_unused)
+{
+ int err;
+ char in[ETH_HLEN]; /* unused */
+
+ LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = in,
+ .data_size_in = sizeof(in), .repeat = 1, );
+
+ while (true) {
+ int fd = bpf_program__fd(ctx.bench->progs.run_bench);
+ err = bpf_prog_test_run_opts(fd, &opts);
+ if (err) {
+ fprintf(stderr, "failed to run BPF prog: %d\n", err);
+ exit(1);
+ }
+
+ /* Check for kernel error code */
+ if ((int)opts.retval < 0) {
+ fprintf(stderr, "BPF prog returned error: %d\n",
+ opts.retval);
+ exit(1);
+ }
+
+ switch (opts.retval) {
+ case LPM_BENCH_SUCCESS:
+ break;
+ case LPM_BENCH_REINIT_MAP:
+ bench_reinit_map();
+ break;
+ default:
+ fprintf(stderr, "Unexpected BPF prog return code %d for op %d\n",
+ opts.retval, ctx.bench->bss->op);
+ exit(1);
+ }
+ }
+
+ return NULL;
+}
+
+static void *lpm_free_producer(void *unused __always_unused)
+{
+ while (true) {
+ struct lpm_trie_map *skel;
+
+ skel = lpm_trie_map__open_and_load();
+ if (!skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ fill_map(bpf_map__fd(skel->maps.trie_free_map));
+ lpm_trie_map__destroy(skel);
+ }
+
+ return NULL;
+}
+
+/*
+ * The standard bench op_report_*() functions assume measurements are
+ * taken over a 1-second interval but operations that modify the map
+ * (INSERT, DELETE, and FREE) cannot run indefinitely without
+ * "resetting" the map to the initial state. Depending on the size of
+ * the map, this likely needs to happen before the 1-second timer fires.
+ *
+ * Calculate the fraction of a second over which the op measurement was
+ * taken (to ignore any time spent doing the reset) and report the
+ * throughput results per second.
+ */
+static void frac_second_report_progress(int iter, struct bench_res *res,
+ long delta_ns, double rate_divisor,
+ char rate)
+{
+ double hits_per_sec, hits_per_prod;
+
+ hits_per_sec = res->hits / rate_divisor /
+ (res->duration_ns / (double)NSEC_PER_SEC);
+ hits_per_prod = hits_per_sec / env.producer_cnt;
+
+ printf("Iter %3d (%7.3lfus): ", iter,
+ (delta_ns - NSEC_PER_SEC) / 1000.0);
+ printf("hits %8.3lf%c/s (%7.3lf%c/prod)\n", hits_per_sec, rate,
+ hits_per_prod, rate);
+}
+
+static void frac_second_report_final(struct bench_res res[], int res_cnt,
+ double lat_divisor, double rate_divisor,
+ char rate, const char *unit)
+{
+ double hits_mean = 0.0, hits_stddev = 0.0;
+ double latency = 0.0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ double val = res[i].hits / rate_divisor /
+ (res[i].duration_ns / (double)NSEC_PER_SEC);
+ hits_mean += val / (0.0 + res_cnt);
+ latency += res[i].duration_ns / res[i].hits / (0.0 + res_cnt);
+ }
+
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++) {
+ double val =
+ res[i].hits / rate_divisor /
+ (res[i].duration_ns / (double)NSEC_PER_SEC);
+ hits_stddev += (hits_mean - val) * (hits_mean - val) /
+ (res_cnt - 1.0);
+ }
+
+ hits_stddev = sqrt(hits_stddev);
+ }
+ printf("Summary: throughput %8.3lf \u00B1 %5.3lf %c ops/s (%7.3lf%c ops/prod), ",
+ hits_mean, hits_stddev, rate, hits_mean / env.producer_cnt,
+ rate);
+ printf("latency %8.3lf %s/op\n",
+ latency / lat_divisor / env.producer_cnt, unit);
+}
+
+static void insert_ops_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double rate_divisor = 1000000.0;
+ char rate = 'M';
+
+ frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void delete_ops_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double rate_divisor = 1000000.0;
+ char rate = 'M';
+
+ frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void free_ops_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double rate_divisor = 1000.0;
+ char rate = 'K';
+
+ frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void insert_ops_report_final(struct bench_res res[], int res_cnt)
+{
+ double lat_divisor = 1.0;
+ double rate_divisor = 1000000.0;
+ const char *unit = "ns";
+ char rate = 'M';
+
+ frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+ unit);
+}
+
+static void delete_ops_report_final(struct bench_res res[], int res_cnt)
+{
+ double lat_divisor = 1.0;
+ double rate_divisor = 1000000.0;
+ const char *unit = "ns";
+ char rate = 'M';
+
+ frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+ unit);
+}
+
+static void free_ops_report_final(struct bench_res res[], int res_cnt)
+{
+ double lat_divisor = 1000000.0;
+ double rate_divisor = 1000.0;
+ const char *unit = "ms";
+ char rate = 'K';
+
+ frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+ unit);
+}
+
+/* noop bench measures harness-overhead */
+const struct bench bench_lpm_trie_noop = {
+ .name = "lpm-trie-noop",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_noop_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* baseline overhead for lookup and update */
+const struct bench bench_lpm_trie_baseline = {
+ .name = "lpm-trie-baseline",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_baseline_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* measure cost of doing a lookup on existing entries in a full trie */
+const struct bench bench_lpm_trie_lookup = {
+ .name = "lpm-trie-lookup",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_lookup_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* measure cost of inserting new entries into an empty trie */
+const struct bench bench_lpm_trie_insert = {
+ .name = "lpm-trie-insert",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = lpm_insert_validate,
+ .setup = lpm_insert_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = insert_ops_report_progress,
+ .report_final = insert_ops_report_final,
+};
+
+/* measure cost of updating existing entries in a full trie */
+const struct bench bench_lpm_trie_update = {
+ .name = "lpm-trie-update",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = validate_common,
+ .setup = lpm_update_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
+
+/* measure cost of deleting existing entries from a full trie */
+const struct bench bench_lpm_trie_delete = {
+ .name = "lpm-trie-delete",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = lpm_delete_validate,
+ .setup = lpm_delete_setup,
+ .producer_thread = lpm_producer,
+ .measure = lpm_measure,
+ .report_progress = delete_ops_report_progress,
+ .report_final = delete_ops_report_final,
+};
+
+/* measure cost of freeing a full trie */
+const struct bench bench_lpm_trie_free = {
+ .name = "lpm-trie-free",
+ .argp = &bench_lpm_trie_map_argp,
+ .validate = lpm_free_validate,
+ .setup = lpm_free_setup,
+ .producer_thread = lpm_free_producer,
+ .measure = lpm_measure,
+ .report_progress = free_ops_report_progress,
+ .report_final = free_ops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_sockmap.c b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
index 8ebf563a67a2..cfc072aa7fff 100644
--- a/tools/testing/selftests/bpf/benchs/bench_sockmap.c
+++ b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
@@ -10,6 +10,7 @@
#include <argp.h>
#include "bench.h"
#include "bench_sockmap_prog.skel.h"
+#include "bpf_util.h"
#define FILE_SIZE (128 * 1024)
#define DATA_REPEAT_SIZE 10
@@ -124,8 +125,8 @@ static void bench_sockmap_prog_destroy(void)
{
int i;
- for (i = 0; i < sizeof(ctx.fds); i++) {
- if (ctx.fds[0] > 0)
+ for (i = 0; i < ARRAY_SIZE(ctx.fds); i++) {
+ if (ctx.fds[i] > 0)
close(ctx.fds[i]);
}
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 82327657846e..1e2aff007c2a 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -226,6 +226,65 @@ static void trigger_fentry_setup(void)
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
}
+static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ char **syms = NULL;
+ size_t cnt = 0;
+
+ /* Some recursive functions will be skipped in
+ * bpf_get_ksyms -> skip_entry, as they can introduce sufficient
+ * overhead. However, it's difficut to skip all the recursive
+ * functions for a debug kernel.
+ *
+ * So, don't run the kprobe-multi-all and kretprobe-multi-all on
+ * a debug kernel.
+ */
+ if (bpf_get_ksyms(&syms, &cnt, true)) {
+ fprintf(stderr, "failed to get ksyms\n");
+ exit(1);
+ }
+
+ opts.syms = (const char **) syms;
+ opts.cnt = cnt;
+ opts.retprobe = kretprobe;
+ /* attach empty to all the kernel functions except bpf_get_numa_node_id. */
+ if (!bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts)) {
+ fprintf(stderr, "failed to attach bpf_program__attach_kprobe_multi_opts to all\n");
+ exit(1);
+ }
+}
+
+static void trigger_kprobe_multi_all_setup(void)
+{
+ struct bpf_program *prog, *empty;
+
+ setup_ctx();
+ empty = ctx.skel->progs.bench_kprobe_multi_empty;
+ prog = ctx.skel->progs.bench_trigger_kprobe_multi;
+ bpf_program__set_autoload(empty, true);
+ bpf_program__set_autoload(prog, true);
+ load_ctx();
+
+ attach_ksyms_all(empty, false);
+ attach_bpf(prog);
+}
+
+static void trigger_kretprobe_multi_all_setup(void)
+{
+ struct bpf_program *prog, *empty;
+
+ setup_ctx();
+ empty = ctx.skel->progs.bench_kretprobe_multi_empty;
+ prog = ctx.skel->progs.bench_trigger_kretprobe_multi;
+ bpf_program__set_autoload(empty, true);
+ bpf_program__set_autoload(prog, true);
+ load_ctx();
+
+ attach_ksyms_all(empty, true);
+ attach_bpf(prog);
+}
+
static void trigger_fexit_setup(void)
{
setup_ctx();
@@ -512,6 +571,8 @@ BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
BENCH_TRIG_KERNEL(fentry, "fentry");
+BENCH_TRIG_KERNEL(kprobe_multi_all, "kprobe-multi-all");
+BENCH_TRIG_KERNEL(kretprobe_multi_all, "kretprobe-multi-all");
BENCH_TRIG_KERNEL(fexit, "fexit");
BENCH_TRIG_KERNEL(fmodret, "fmodret");
BENCH_TRIG_KERNEL(tp, "tp");
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
index a690f5a68b6b..f7573708a0c3 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
@@ -6,8 +6,8 @@ def_tests=( \
usermode-count kernel-count syscall-count \
fentry fexit fmodret \
rawtp tp \
- kprobe kprobe-multi \
- kretprobe kretprobe-multi \
+ kprobe kprobe-multi kprobe-multi-all \
+ kretprobe kretprobe-multi kretprobe-multi-all \
)
tests=("$@")
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index da7e230f2781..d89eda3fd8a3 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -599,4 +599,58 @@ extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym;
extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str,
struct bpf_dynptr *value_p) __weak __ksym;
+#define PREEMPT_BITS 8
+#define SOFTIRQ_BITS 8
+#define HARDIRQ_BITS 4
+#define NMI_BITS 4
+
+#define PREEMPT_SHIFT 0
+#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
+#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
+#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
+
+#define __IRQ_MASK(x) ((1UL << (x))-1)
+
+#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
+#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
+#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
+
+extern bool CONFIG_PREEMPT_RT __kconfig __weak;
+#ifdef bpf_target_x86
+extern const int __preempt_count __ksym;
+#endif
+
+struct task_struct___preempt_rt {
+ int softirq_disable_cnt;
+} __attribute__((preserve_access_index));
+
+static inline int get_preempt_count(void)
+{
+#if defined(bpf_target_x86)
+ return *(int *) bpf_this_cpu_ptr(&__preempt_count);
+#elif defined(bpf_target_arm64)
+ return bpf_get_current_task_btf()->thread_info.preempt.count;
+#endif
+ return 0;
+}
+
+/* Description
+ * Report whether it is in interrupt context. Only works on the following archs:
+ * * x86
+ * * arm64
+ */
+static inline int bpf_in_interrupt(void)
+{
+ struct task_struct___preempt_rt *tsk;
+ int pcnt;
+
+ pcnt = get_preempt_count();
+ if (!CONFIG_PREEMPT_RT)
+ return pcnt & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK);
+
+ tsk = (void *) bpf_get_current_task_btf();
+ return (pcnt & (NMI_MASK | HARDIRQ_MASK)) |
+ (tsk->softirq_disable_cnt & SOFTIRQ_MASK);
+}
+
#endif
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index 9386dfe8b884..794d44d19c88 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -19,6 +19,9 @@ extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags,
struct bpf_dynptr *ptr__uninit) __ksym __weak;
+extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags,
+ struct bpf_dynptr *ptr__uninit) __ksym __weak;
+
/* Description
* Obtain a read-only pointer to the dynptr's data
* Returns
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index 5f6963a320d7..4bc2d25f33e1 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -67,6 +67,9 @@ static inline void bpf_strlcpy(char *dst, const char *src, size_t sz)
#define sys_gettid() syscall(SYS_gettid)
#endif
+/* and poison usage to ensure it does not creep back in. */
+#pragma GCC poison gettid
+
#ifndef ENOTSUPP
#define ENOTSUPP 524
#endif
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 15f626014872..20cede4db3ce 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -412,6 +412,26 @@ void remove_cgroup(const char *relative_path)
log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
}
+/*
+ * remove_cgroup_pid() - Remove a cgroup setup by process identified by PID
+ * @relative_path: The cgroup path, relative to the workdir, to remove
+ * @pid: PID to be used to find cgroup_path
+ *
+ * This function expects a cgroup to already be created, relative to the cgroup
+ * work dir. It also expects the cgroup doesn't have any children or live
+ * processes and it removes the cgroup.
+ *
+ * On failure, it will print an error to stderr.
+ */
+void remove_cgroup_pid(const char *relative_path, int pid)
+{
+ char cgroup_path[PATH_MAX + 1];
+
+ format_cgroup_path_pid(cgroup_path, relative_path, pid);
+ if (rmdir(cgroup_path))
+ log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
+}
+
/**
* create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
* @relative_path: The cgroup path, relative to the workdir, to join
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 182e1ac36c95..3857304be874 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -19,6 +19,7 @@ int cgroup_setup_and_join(const char *relative_path);
int get_root_cgroup(void);
int create_and_get_cgroup(const char *relative_path);
void remove_cgroup(const char *relative_path);
+void remove_cgroup_pid(const char *relative_path, int pid);
unsigned long long get_cgroup_id(const char *relative_path);
int get_cgroup1_hierarchy_id(const char *subsys_name);
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 8916ab814a3e..70b28c1e653e 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -61,6 +61,7 @@ CONFIG_MPLS_IPTUNNEL=y
CONFIG_MPLS_ROUTING=y
CONFIG_MPTCP=y
CONFIG_NET_ACT_GACT=y
+CONFIG_NET_ACT_MIRRED=y
CONFIG_NET_ACT_SKBMOD=y
CONFIG_NET_CLS=y
CONFIG_NET_CLS_ACT=y
diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
index e1495a4bbc99..7efad36ceb26 100644
--- a/tools/testing/selftests/bpf/config.aarch64
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -31,10 +31,7 @@ CONFIG_COMPAT=y
CONFIG_CPUSETS=y
CONFIG_CRASH_DUMP=y
CONFIG_CRYPTO_USER_API_RNG=y
-CONFIG_CRYPTO_USER_API_SKCIPHER=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_INFO_BTF=y
-CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_DEBUG_INFO_REDUCED=n
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_LOCKDEP=y
@@ -46,7 +43,6 @@ CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_DEVTMPFS=y
CONFIG_DRM=y
-CONFIG_DUMMY=y
CONFIG_EXPERT=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
@@ -70,13 +66,11 @@ CONFIG_HZ_100=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_IKHEADERS=y
CONFIG_INET6_ESP=y
-CONFIG_INET_ESP=y
CONFIG_INET=y
CONFIG_INPUT_EVDEV=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_NF_IPTABLES=y
CONFIG_IPV6_SEG6_LWTUNNEL=y
CONFIG_IPVLAN=y
CONFIG_JUMP_LABEL=y
@@ -97,22 +91,18 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_NAMESPACES=y
CONFIG_NET_ACT_BPF=y
-CONFIG_NET_ACT_GACT=y
CONFIG_NETDEVICES=y
CONFIG_NETFILTER_XT_MATCH_BPF=y
CONFIG_NETFILTER_XT_TARGET_MARK=y
CONFIG_NET_KEY=y
-CONFIG_NET_SCH_FQ=y
CONFIG_NET_VRF=y
CONFIG_NET=y
-CONFIG_NF_TABLES=y
CONFIG_NLMON=y
CONFIG_NO_HZ_IDLE=y
CONFIG_NR_CPUS=256
CONFIG_NUMA=y
CONFIG_OVERLAY_FS=y
CONFIG_PACKET_DIAG=y
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI_HOST_GENERIC=y
@@ -149,7 +139,6 @@ CONFIG_TASK_XACCT=y
CONFIG_TCG_TIS=y
CONFIG_TCG_TPM=y
CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_DCTCP=y
CONFIG_TLS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_TMPFS=y
@@ -161,6 +150,5 @@ CONFIG_UPROBES=y
CONFIG_USER_NS=y
CONFIG_VETH=y
CONFIG_VLAN_8021Q=y
-CONFIG_VSOCKETS=y
CONFIG_VSOCKETS_LOOPBACK=y
CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.ppc64el b/tools/testing/selftests/bpf/config.ppc64el
index 9acf389dc4ce..b53afb5e0b71 100644
--- a/tools/testing/selftests/bpf/config.ppc64el
+++ b/tools/testing/selftests/bpf/config.ppc64el
@@ -54,7 +54,6 @@ CONFIG_NET=y
CONFIG_NO_HZ_IDLE=y
CONFIG_NONPORTABLE=y
CONFIG_NR_CPUS=256
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI_HOST_GENERIC=y
diff --git a/tools/testing/selftests/bpf/config.riscv64 b/tools/testing/selftests/bpf/config.riscv64
index bb7043a80e1a..7bee24a79a71 100644
--- a/tools/testing/selftests/bpf/config.riscv64
+++ b/tools/testing/selftests/bpf/config.riscv64
@@ -48,7 +48,6 @@ CONFIG_NET_VRF=y
CONFIG_NONPORTABLE=y
CONFIG_NO_HZ_IDLE=y
CONFIG_NR_CPUS=256
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI=y
diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x
index 26c3bc2ce11d..db61878148e4 100644
--- a/tools/testing/selftests/bpf/config.s390x
+++ b/tools/testing/selftests/bpf/config.s390x
@@ -22,10 +22,7 @@ CONFIG_CHECKPOINT_RESTORE=y
CONFIG_CPUSETS=y
CONFIG_CRASH_DUMP=y
CONFIG_CRYPTO_USER_API_RNG=y
-CONFIG_CRYPTO_USER_API_SKCIPHER=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_INFO_BTF=y
-CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_LOCKDEP=y
CONFIG_DEBUG_NOTIFIERS=y
@@ -56,11 +53,9 @@ CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_IKHEADERS=y
CONFIG_INET6_ESP=y
CONFIG_INET=y
-CONFIG_INET_ESP=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_NF_IPTABLES=y
CONFIG_IPV6_SEG6_LWTUNNEL=y
CONFIG_IPVLAN=y
CONFIG_JUMP_LABEL=y
@@ -83,18 +78,14 @@ CONFIG_MEMORY_HOTREMOVE=y
CONFIG_NAMESPACES=y
CONFIG_NET=y
CONFIG_NET_ACT_BPF=y
-CONFIG_NET_ACT_GACT=y
CONFIG_NET_KEY=y
-CONFIG_NET_SCH_FQ=y
CONFIG_NET_VRF=y
CONFIG_NETDEVICES=y
CONFIG_NETFILTER_XT_MATCH_BPF=y
CONFIG_NETFILTER_XT_TARGET_MARK=y
-CONFIG_NF_TABLES=y
CONFIG_NO_HZ_IDLE=y
CONFIG_NR_CPUS=256
CONFIG_NUMA=y
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI=y
@@ -119,7 +110,6 @@ CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_TASK_XACCT=y
CONFIG_TASKSTATS=y
CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_DCTCP=y
CONFIG_TLS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
@@ -131,6 +121,5 @@ CONFIG_UPROBES=y
CONFIG_USER_NS=y
CONFIG_VETH=y
CONFIG_VLAN_8021Q=y
-CONFIG_VSOCKETS=y
CONFIG_VSOCKETS_LOOPBACK=y
CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
index 5e713ef7caa3..42ad817b00ae 100644
--- a/tools/testing/selftests/bpf/config.x86_64
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -44,7 +44,6 @@ CONFIG_CRYPTO_SEQIV=y
CONFIG_CRYPTO_XXHASH=y
CONFIG_DCB=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEFAULT_FQ_CODEL=y
@@ -104,12 +103,10 @@ CONFIG_HZ_1000=y
CONFIG_INET=y
CONFIG_INPUT_EVDEV=y
CONFIG_INTEL_POWERCLAMP=y
-CONFIG_IP6_NF_IPTABLES=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MROUTE=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
CONFIG_IP_ROUTE_MULTIPATH=y
@@ -162,7 +159,6 @@ CONFIG_NUMA=y
CONFIG_NUMA_BALANCING=y
CONFIG_NVMEM=y
CONFIG_OSF_PARTITION=y
-CONFIG_PACKET=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_PCI=y
@@ -220,7 +216,6 @@ CONFIG_VALIDATE_FS_PARSER=y
CONFIG_VETH=y
CONFIG_VIRT_DRIVERS=y
CONFIG_VLAN_8021Q=y
-CONFIG_VSOCKETS=y
CONFIG_VSOCKETS_LOOPBACK=y
CONFIG_X86_ACPI_CPUFREQ=y
CONFIG_X86_CPUID=y
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 72b5c174ab3b..cdf7b6641444 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -457,7 +457,7 @@ int append_tid(char *str, size_t sz)
if (end + 8 > sz)
return -1;
- sprintf(&str[end], "%07d", gettid());
+ sprintf(&str[end], "%07ld", sys_gettid());
str[end + 7] = '\0';
return 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 1d53a8561ee2..24c509ce4e5b 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -42,11 +42,11 @@ static struct bpf_align_test tests[] = {
.matches = {
{0, "R1", "ctx()"},
{0, "R10", "fp0"},
- {0, "R3_w", "2"},
- {1, "R3_w", "4"},
- {2, "R3_w", "8"},
- {3, "R3_w", "16"},
- {4, "R3_w", "32"},
+ {0, "R3", "2"},
+ {1, "R3", "4"},
+ {2, "R3", "8"},
+ {3, "R3", "16"},
+ {4, "R3", "32"},
},
},
{
@@ -70,17 +70,17 @@ static struct bpf_align_test tests[] = {
.matches = {
{0, "R1", "ctx()"},
{0, "R10", "fp0"},
- {0, "R3_w", "1"},
- {1, "R3_w", "2"},
- {2, "R3_w", "4"},
- {3, "R3_w", "8"},
- {4, "R3_w", "16"},
- {5, "R3_w", "1"},
- {6, "R4_w", "32"},
- {7, "R4_w", "16"},
- {8, "R4_w", "8"},
- {9, "R4_w", "4"},
- {10, "R4_w", "2"},
+ {0, "R3", "1"},
+ {1, "R3", "2"},
+ {2, "R3", "4"},
+ {3, "R3", "8"},
+ {4, "R3", "16"},
+ {5, "R3", "1"},
+ {6, "R4", "32"},
+ {7, "R4", "16"},
+ {8, "R4", "8"},
+ {9, "R4", "4"},
+ {10, "R4", "2"},
},
},
{
@@ -99,12 +99,12 @@ static struct bpf_align_test tests[] = {
.matches = {
{0, "R1", "ctx()"},
{0, "R10", "fp0"},
- {0, "R3_w", "4"},
- {1, "R3_w", "8"},
- {2, "R3_w", "10"},
- {3, "R4_w", "8"},
- {4, "R4_w", "12"},
- {5, "R4_w", "14"},
+ {0, "R3", "4"},
+ {1, "R3", "8"},
+ {2, "R3", "10"},
+ {3, "R4", "8"},
+ {4, "R4", "12"},
+ {5, "R4", "14"},
},
},
{
@@ -121,10 +121,10 @@ static struct bpf_align_test tests[] = {
.matches = {
{0, "R1", "ctx()"},
{0, "R10", "fp0"},
- {0, "R3_w", "7"},
- {1, "R3_w", "7"},
- {2, "R3_w", "14"},
- {3, "R3_w", "56"},
+ {0, "R3", "7"},
+ {1, "R3", "7"},
+ {2, "R3", "14"},
+ {3, "R3", "56"},
},
},
@@ -162,19 +162,19 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {6, "R0_w", "pkt(off=8,r=8)"},
- {6, "R3_w", "var_off=(0x0; 0xff)"},
- {7, "R3_w", "var_off=(0x0; 0x1fe)"},
- {8, "R3_w", "var_off=(0x0; 0x3fc)"},
- {9, "R3_w", "var_off=(0x0; 0x7f8)"},
- {10, "R3_w", "var_off=(0x0; 0xff0)"},
- {12, "R3_w", "pkt_end()"},
- {17, "R4_w", "var_off=(0x0; 0xff)"},
- {18, "R4_w", "var_off=(0x0; 0x1fe0)"},
- {19, "R4_w", "var_off=(0x0; 0xff0)"},
- {20, "R4_w", "var_off=(0x0; 0x7f8)"},
- {21, "R4_w", "var_off=(0x0; 0x3fc)"},
- {22, "R4_w", "var_off=(0x0; 0x1fe)"},
+ {6, "R0", "pkt(off=8,r=8)"},
+ {6, "R3", "var_off=(0x0; 0xff)"},
+ {7, "R3", "var_off=(0x0; 0x1fe)"},
+ {8, "R3", "var_off=(0x0; 0x3fc)"},
+ {9, "R3", "var_off=(0x0; 0x7f8)"},
+ {10, "R3", "var_off=(0x0; 0xff0)"},
+ {12, "R3", "pkt_end()"},
+ {17, "R4", "var_off=(0x0; 0xff)"},
+ {18, "R4", "var_off=(0x0; 0x1fe0)"},
+ {19, "R4", "var_off=(0x0; 0xff0)"},
+ {20, "R4", "var_off=(0x0; 0x7f8)"},
+ {21, "R4", "var_off=(0x0; 0x3fc)"},
+ {22, "R4", "var_off=(0x0; 0x1fe)"},
},
},
{
@@ -195,16 +195,16 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {6, "R3_w", "var_off=(0x0; 0xff)"},
- {7, "R4_w", "var_off=(0x0; 0xff)"},
- {8, "R4_w", "var_off=(0x0; 0xff)"},
- {9, "R4_w", "var_off=(0x0; 0xff)"},
- {10, "R4_w", "var_off=(0x0; 0x1fe)"},
- {11, "R4_w", "var_off=(0x0; 0xff)"},
- {12, "R4_w", "var_off=(0x0; 0x3fc)"},
- {13, "R4_w", "var_off=(0x0; 0xff)"},
- {14, "R4_w", "var_off=(0x0; 0x7f8)"},
- {15, "R4_w", "var_off=(0x0; 0xff0)"},
+ {6, "R3", "var_off=(0x0; 0xff)"},
+ {7, "R4", "var_off=(0x0; 0xff)"},
+ {8, "R4", "var_off=(0x0; 0xff)"},
+ {9, "R4", "var_off=(0x0; 0xff)"},
+ {10, "R4", "var_off=(0x0; 0x1fe)"},
+ {11, "R4", "var_off=(0x0; 0xff)"},
+ {12, "R4", "var_off=(0x0; 0x3fc)"},
+ {13, "R4", "var_off=(0x0; 0xff)"},
+ {14, "R4", "var_off=(0x0; 0x7f8)"},
+ {15, "R4", "var_off=(0x0; 0xff0)"},
},
},
{
@@ -235,14 +235,14 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {2, "R5_w", "pkt(r=0)"},
- {4, "R5_w", "pkt(off=14,r=0)"},
- {5, "R4_w", "pkt(off=14,r=0)"},
+ {2, "R5", "pkt(r=0)"},
+ {4, "R5", "pkt(off=14,r=0)"},
+ {5, "R4", "pkt(off=14,r=0)"},
{9, "R2", "pkt(r=18)"},
{10, "R5", "pkt(off=14,r=18)"},
- {10, "R4_w", "var_off=(0x0; 0xff)"},
- {13, "R4_w", "var_off=(0x0; 0xffff)"},
- {14, "R4_w", "var_off=(0x0; 0xffff)"},
+ {10, "R4", "var_off=(0x0; 0xff)"},
+ {13, "R4", "var_off=(0x0; 0xffff)"},
+ {14, "R4", "var_off=(0x0; 0xffff)"},
},
},
{
@@ -299,12 +299,12 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(r=8)"},
- {7, "R6_w", "var_off=(0x0; 0x3fc)"},
+ {6, "R2", "pkt(r=8)"},
+ {7, "R6", "var_off=(0x0; 0x3fc)"},
/* Offset is added to packet pointer R5, resulting in
* known fixed offset, and variable offset from R6.
*/
- {11, "R5_w", "pkt(id=1,off=14,"},
+ {11, "R5", "pkt(id=1,off=14,"},
/* At the time the word size load is performed from R5,
* it's total offset is NET_IP_ALIGN + reg->off (0) +
* reg->aux_off (14) which is 16. Then the variable
@@ -320,12 +320,12 @@ static struct bpf_align_test tests[] = {
* instruction to validate R5 state. We also check
* that R4 is what it should be in such case.
*/
- {18, "R4_w", "var_off=(0x0; 0x3fc)"},
- {18, "R5_w", "var_off=(0x0; 0x3fc)"},
+ {18, "R4", "var_off=(0x0; 0x3fc)"},
+ {18, "R5", "var_off=(0x0; 0x3fc)"},
/* Constant offset is added to R5, resulting in
* reg->off of 14.
*/
- {19, "R5_w", "pkt(id=2,off=14,"},
+ {19, "R5", "pkt(id=2,off=14,"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off
* (14) which is 16. Then the variable offset is 4-byte
@@ -337,21 +337,21 @@ static struct bpf_align_test tests[] = {
/* Constant offset is added to R5 packet pointer,
* resulting in reg->off value of 14.
*/
- {26, "R5_w", "pkt(off=14,r=8)"},
+ {26, "R5", "pkt(off=14,r=8)"},
/* Variable offset is added to R5, resulting in a
* variable offset of (4n). See comment for insn #18
* for R4 = R5 trick.
*/
- {28, "R4_w", "var_off=(0x0; 0x3fc)"},
- {28, "R5_w", "var_off=(0x0; 0x3fc)"},
+ {28, "R4", "var_off=(0x0; 0x3fc)"},
+ {28, "R5", "var_off=(0x0; 0x3fc)"},
/* Constant is added to R5 again, setting reg->off to 18. */
- {29, "R5_w", "pkt(id=3,off=18,"},
+ {29, "R5", "pkt(id=3,off=18,"},
/* And once more we add a variable; resulting var_off
* is still (4n), fixed offset is not changed.
* Also, we create a new reg->id.
*/
- {31, "R4_w", "var_off=(0x0; 0x7fc)"},
- {31, "R5_w", "var_off=(0x0; 0x7fc)"},
+ {31, "R4", "var_off=(0x0; 0x7fc)"},
+ {31, "R5", "var_off=(0x0; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (18)
* which is 20. Then the variable offset is (4n), so
@@ -397,12 +397,12 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(r=8)"},
- {7, "R6_w", "var_off=(0x0; 0x3fc)"},
+ {6, "R2", "pkt(r=8)"},
+ {7, "R6", "var_off=(0x0; 0x3fc)"},
/* Adding 14 makes R6 be (4n+2) */
- {8, "R6_w", "var_off=(0x2; 0x7fc)"},
+ {8, "R6", "var_off=(0x2; 0x7fc)"},
/* Packet pointer has (4n+2) offset */
- {11, "R5_w", "var_off=(0x2; 0x7fc)"},
+ {11, "R5", "var_off=(0x2; 0x7fc)"},
{12, "R4", "var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
@@ -414,11 +414,11 @@ static struct bpf_align_test tests[] = {
/* Newly read value in R6 was shifted left by 2, so has
* known alignment of 4.
*/
- {17, "R6_w", "var_off=(0x0; 0x3fc)"},
+ {17, "R6", "var_off=(0x0; 0x3fc)"},
/* Added (4n) to packet pointer's (4n+2) var_off, giving
* another (4n+2).
*/
- {19, "R5_w", "var_off=(0x2; 0xffc)"},
+ {19, "R5", "var_off=(0x2; 0xffc)"},
{20, "R4", "var_off=(0x2; 0xffc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
@@ -459,18 +459,18 @@ static struct bpf_align_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.matches = {
- {3, "R5_w", "pkt_end()"},
+ {3, "R5", "pkt_end()"},
/* (ptr - ptr) << 2 == unknown, (4n) */
- {5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"},
+ {5, "R5", "var_off=(0x0; 0xfffffffffffffffc)"},
/* (4n) + 14 == (4n+2). We blow our bounds, because
* the add could overflow.
*/
- {6, "R5_w", "var_off=(0x2; 0xfffffffffffffffc)"},
+ {6, "R5", "var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>=0 */
{9, "R5", "var_off=(0x2; 0x7ffffffffffffffc)"},
/* packet pointer + nonnegative (4n+2) */
- {11, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
- {12, "R4_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
+ {11, "R6", "var_off=(0x2; 0x7ffffffffffffffc)"},
+ {12, "R4", "var_off=(0x2; 0x7ffffffffffffffc)"},
/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
* We checked the bounds, but it might have been able
* to overflow if the packet pointer started in the
@@ -478,7 +478,7 @@ static struct bpf_align_test tests[] = {
* So we did not get a 'range' on R6, and the access
* attempt will fail.
*/
- {15, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
+ {15, "R6", "var_off=(0x2; 0x7ffffffffffffffc)"},
}
},
{
@@ -513,12 +513,12 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(r=8)"},
- {8, "R6_w", "var_off=(0x0; 0x3fc)"},
+ {6, "R2", "pkt(r=8)"},
+ {8, "R6", "var_off=(0x0; 0x3fc)"},
/* Adding 14 makes R6 be (4n+2) */
- {9, "R6_w", "var_off=(0x2; 0x7fc)"},
+ {9, "R6", "var_off=(0x2; 0x7fc)"},
/* New unknown value in R7 is (4n) */
- {10, "R7_w", "var_off=(0x0; 0x3fc)"},
+ {10, "R7", "var_off=(0x0; 0x3fc)"},
/* Subtracting it from R6 blows our unsigned bounds */
{11, "R6", "var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>= 0 */
@@ -566,16 +566,16 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(r=8)"},
- {9, "R6_w", "var_off=(0x0; 0x3c)"},
+ {6, "R2", "pkt(r=8)"},
+ {9, "R6", "var_off=(0x0; 0x3c)"},
/* Adding 14 makes R6 be (4n+2) */
- {10, "R6_w", "var_off=(0x2; 0x7c)"},
+ {10, "R6", "var_off=(0x2; 0x7c)"},
/* Subtracting from packet pointer overflows ubounds */
- {13, "R5_w", "var_off=(0xffffffffffffff82; 0x7c)"},
+ {13, "R5", "var_off=(0xffffffffffffff82; 0x7c)"},
/* New unknown value in R7 is (4n), >= 76 */
- {14, "R7_w", "var_off=(0x0; 0x7fc)"},
+ {14, "R7", "var_off=(0x0; 0x7fc)"},
/* Adding it to packet pointer gives nice bounds again */
- {16, "R5_w", "var_off=(0x2; 0x7fc)"},
+ {16, "R5", "var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
index 0223fce4db2b..693fd86fbde6 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
@@ -40,8 +40,13 @@ static void *spin_lock_thread(void *arg)
err = bpf_prog_test_run_opts(prog_fd, &topts);
ASSERT_OK(err, "test_run err");
+
+ if (topts.retval == -EOPNOTSUPP)
+ goto end;
+
ASSERT_EQ((int)topts.retval, 0, "test_run retval");
+end:
pthread_exit(arg);
}
@@ -63,6 +68,7 @@ static void test_arena_spin_lock_size(int size)
skel = arena_spin_lock__open_and_load();
if (!ASSERT_OK_PTR(skel, "arena_spin_lock__open_and_load"))
return;
+
if (skel->data->test_skip == 2) {
test__skip();
goto end;
@@ -86,6 +92,13 @@ static void test_arena_spin_lock_size(int size)
goto end_barrier;
}
+ if (skel->data->test_skip == 3) {
+ printf("%s:SKIP: CONFIG_NR_CPUS exceed the maximum supported by arena spinlock\n",
+ __func__);
+ test__skip();
+ goto end_barrier;
+ }
+
ASSERT_EQ(skel->bss->counter, repeat * nthreads, "check counter value");
end_barrier:
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c
index 13e101f370a1..92b5f378bfb8 100644
--- a/tools/testing/selftests/bpf/prog_tests/atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -165,11 +165,17 @@ static void test_xchg(struct atomics_lskel *skel)
void test_atomics(void)
{
struct atomics_lskel *skel;
+ int err;
- skel = atomics_lskel__open_and_load();
- if (!ASSERT_OK_PTR(skel, "atomics skeleton load"))
+ skel = atomics_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "atomics skeleton open"))
return;
+ skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = atomics_lskel__load(skel);
+ if (!ASSERT_OK(err, "atomics skeleton load"))
+ goto cleanup;
+
if (skel->data->skip_tests) {
printf("%s:SKIP:no ENABLE_ATOMICS_TESTS (missing Clang BPF atomics support)",
__func__);
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index cabc51c2ca6b..9e77e5da7097 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -3,6 +3,7 @@
#include "test_attach_kprobe_sleepable.skel.h"
#include "test_attach_probe_manual.skel.h"
#include "test_attach_probe.skel.h"
+#include "kprobe_write_ctx.skel.h"
/* this is how USDT semaphore is actually defined, except volatile modifier */
volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes")));
@@ -201,6 +202,31 @@ cleanup:
test_attach_probe_manual__destroy(skel);
}
+#ifdef __x86_64__
+/* attach kprobe/kretprobe long event name testings */
+static void test_attach_kprobe_write_ctx(void)
+{
+ struct kprobe_write_ctx *skel = NULL;
+ struct bpf_link *link = NULL;
+
+ skel = kprobe_write_ctx__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_write_ctx__open_and_load"))
+ return;
+
+ link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_write_ctx,
+ "bpf_fentry_test1", NULL);
+ if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_opts"))
+ bpf_link__destroy(link);
+
+ kprobe_write_ctx__destroy(skel);
+}
+#else
+static void test_attach_kprobe_write_ctx(void)
+{
+ test__skip();
+}
+#endif
+
static void test_attach_probe_auto(struct test_attach_probe *skel)
{
struct bpf_link *uprobe_err_link;
@@ -406,6 +432,8 @@ void test_attach_probe(void)
test_attach_uprobe_long_event_name();
if (test__start_subtest("kprobe-long_name"))
test_attach_kprobe_long_event_name();
+ if (test__start_subtest("kprobe-write-ctx"))
+ test_attach_kprobe_write_ctx();
cleanup:
test_attach_probe__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 4a0670c056ba..75f4dff7d042 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -450,8 +450,7 @@ static void pe_subtest(struct test_bpf_cookie *skel)
attr.size = sizeof(attr);
attr.type = PERF_TYPE_SOFTWARE;
attr.config = PERF_COUNT_SW_CPU_CLOCK;
- attr.freq = 1;
- attr.sample_freq = 10000;
+ attr.sample_period = 100000;
pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
if (!ASSERT_GE(pfd, 0, "perf_fd"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 82903585c870..10cba526d3e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -63,7 +63,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
/* tests with t->known_ptr_sz have no "long" or "unsigned long" type,
* so it's impossible to determine correct pointer size; but if they
- * do, it should be 8 regardless of host architecture, becaues BPF
+ * do, it should be 8 regardless of host architecture, because BPF
* target is always 64-bit
*/
if (!t->known_ptr_sz) {
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c
index e0dd966e4a3e..5ad904e9d15d 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c
@@ -44,7 +44,7 @@ static void test_read_cgroup_xattr(void)
if (!ASSERT_OK_PTR(skel, "read_cgroupfs_xattr__open_and_load"))
goto out;
- skel->bss->target_pid = gettid();
+ skel->bss->target_pid = sys_gettid();
if (!ASSERT_OK(read_cgroupfs_xattr__attach(skel), "read_cgroupfs_xattr__attach"))
goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
index adda85f97058..4b42fbc96efc 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
@@ -4,6 +4,8 @@
#define _GNU_SOURCE
#include <cgroup_helpers.h>
#include <test_progs.h>
+#include <sched.h>
+#include <sys/wait.h>
#include "cgrp_kfunc_failure.skel.h"
#include "cgrp_kfunc_success.skel.h"
@@ -87,6 +89,72 @@ static const char * const success_tests[] = {
"test_cgrp_from_id",
};
+static void test_cgrp_from_id_ns(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct cgrp_kfunc_success *skel;
+ struct bpf_program *prog;
+ int pid, pipe_fd[2];
+
+ skel = open_load_cgrp_kfunc_skel();
+ if (!ASSERT_OK_PTR(skel, "open_load_skel"))
+ return;
+
+ if (!ASSERT_OK(skel->bss->err, "pre_mkdir_err"))
+ goto cleanup;
+
+ prog = skel->progs.test_cgrp_from_id_ns;
+
+ if (!ASSERT_OK(pipe(pipe_fd), "pipe"))
+ goto cleanup;
+
+ pid = fork();
+ if (!ASSERT_GE(pid, 0, "fork result")) {
+ close(pipe_fd[0]);
+ close(pipe_fd[1]);
+ goto cleanup;
+ }
+
+ if (pid == 0) {
+ int ret = 0;
+
+ close(pipe_fd[0]);
+
+ if (!ASSERT_GE(cgroup_setup_and_join("cgrp_from_id_ns"), 0, "join cgroup"))
+ exit(1);
+
+ if (!ASSERT_OK(unshare(CLONE_NEWCGROUP), "unshare cgns"))
+ exit(1);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+ if (!ASSERT_OK(ret, "test run ret"))
+ exit(1);
+
+ if (!ASSERT_OK(opts.retval, "test run retval"))
+ exit(1);
+
+ if (!ASSERT_EQ(write(pipe_fd[1], &ret, sizeof(ret)), sizeof(ret), "write pipe"))
+ exit(1);
+
+ exit(0);
+ } else {
+ int res;
+
+ close(pipe_fd[1]);
+
+ ASSERT_EQ(read(pipe_fd[0], &res, sizeof(res)), sizeof(res), "read res");
+ ASSERT_EQ(waitpid(pid, NULL, 0), pid, "wait on child");
+
+ remove_cgroup_pid("cgrp_from_id_ns", pid);
+
+ ASSERT_OK(res, "result from run");
+ }
+
+ close(pipe_fd[0]);
+cleanup:
+ cgrp_kfunc_success__destroy(skel);
+}
+
void test_cgrp_kfunc(void)
{
int i, err;
@@ -102,6 +170,9 @@ void test_cgrp_kfunc(void)
run_success_test(success_tests[i]);
}
+ if (test__start_subtest("test_cgrp_from_id_ns"))
+ test_cgrp_from_id_ns();
+
RUN_TESTS(cgrp_kfunc_failure);
cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index 9b2d9ceda210..b9f86cb91e81 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -32,6 +32,8 @@ static struct {
{"test_ringbuf", SETUP_SYSCALL_SLEEP},
{"test_skb_readonly", SETUP_SKB_PROG},
{"test_dynptr_skb_data", SETUP_SKB_PROG},
+ {"test_dynptr_skb_meta_data", SETUP_SKB_PROG},
+ {"test_dynptr_skb_meta_flags", SETUP_SKB_PROG},
{"test_adjust", SETUP_SYSCALL_SLEEP},
{"test_adjust_err", SETUP_SYSCALL_SLEEP},
{"test_zero_size_dynptr", SETUP_SYSCALL_SLEEP},
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c
index 241b2c8c6e0f..c534b4d5f9da 100644
--- a/tools/testing/selftests/bpf/prog_tests/fd_array.c
+++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c
@@ -293,7 +293,7 @@ static int get_btf_id_by_fd(int btf_fd, __u32 *id)
* 1) Create a new btf, it's referenced only by a file descriptor, so refcnt=1
* 2) Load a BPF prog with fd_array[0] = btf_fd; now btf's refcnt=2
* 3) Close the btf_fd, now refcnt=1
- * Wait and check that BTF stil exists.
+ * Wait and check that BTF still exists.
*/
static void check_fd_array_cnt__referenced_btfs(void)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 130f5b82d2e6..5ef1804e44df 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -12,13 +12,24 @@ void test_fentry_fexit(void)
int err, prog_fd, i;
LIBBPF_OPTS(bpf_test_run_opts, topts);
- fentry_skel = fentry_test_lskel__open_and_load();
+ fentry_skel = fentry_test_lskel__open();
if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
goto close_prog;
- fexit_skel = fexit_test_lskel__open_and_load();
+
+ fentry_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fentry_test_lskel__load(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_skel_load"))
+ goto close_prog;
+
+ fexit_skel = fexit_test_lskel__open();
if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
goto close_prog;
+ fexit_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fexit_test_lskel__load(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_skel_load"))
+ goto close_prog;
+
err = fentry_test_lskel__attach(fentry_skel);
if (!ASSERT_OK(err, "fentry_attach"))
goto close_prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index aee1bc77a17f..ec882328eb59 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -43,8 +43,13 @@ static void fentry_test(void)
struct fentry_test_lskel *fentry_skel = NULL;
int err;
- fentry_skel = fentry_test_lskel__open_and_load();
- if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
+ fentry_skel = fentry_test_lskel__open();
+ if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_open"))
+ goto cleanup;
+
+ fentry_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fentry_test_lskel__load(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_skel_load"))
goto cleanup;
err = fentry_test_common(fentry_skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index 1c13007e37dd..94eed753560c 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -43,8 +43,13 @@ static void fexit_test(void)
struct fexit_test_lskel *fexit_skel = NULL;
int err;
- fexit_skel = fexit_test_lskel__open_and_load();
- if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
+ fexit_skel = fexit_test_lskel__open();
+ if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_open"))
+ goto cleanup;
+
+ fexit_skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+ err = fexit_test_lskel__load(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_skel_load"))
goto cleanup;
err = fexit_test_common(fexit_skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/free_timer.c b/tools/testing/selftests/bpf/prog_tests/free_timer.c
index b7b77a6b2979..0de8facca4c5 100644
--- a/tools/testing/selftests/bpf/prog_tests/free_timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/free_timer.c
@@ -124,6 +124,10 @@ void test_free_timer(void)
int err;
skel = free_timer__open_and_load();
+ if (!skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
if (!ASSERT_OK_PTR(skel, "open_load"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/kernel_flag.c b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
index a133354ac9bc..97b00c7efe94 100644
--- a/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
+++ b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
@@ -16,7 +16,7 @@ void test_kernel_flag(void)
if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel"))
return;
- lsm_skel->bss->monitored_tid = gettid();
+ lsm_skel->bss->monitored_tid = sys_gettid();
ret = test_kernel_flag__attach(lsm_skel);
if (!ASSERT_OK(ret, "test_kernel_flag__attach"))
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index e19ef509ebf8..6cfaa978bc9a 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -7,6 +7,7 @@
#include "kprobe_multi_session.skel.h"
#include "kprobe_multi_session_cookie.skel.h"
#include "kprobe_multi_verifier.skel.h"
+#include "kprobe_write_ctx.skel.h"
#include "bpf/libbpf_internal.h"
#include "bpf/hashmap.h"
@@ -422,220 +423,6 @@ static void test_unique_match(void)
kprobe_multi__destroy(skel);
}
-static size_t symbol_hash(long key, void *ctx __maybe_unused)
-{
- return str_hash((const char *) key);
-}
-
-static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
-{
- return strcmp((const char *) key1, (const char *) key2) == 0;
-}
-
-static bool is_invalid_entry(char *buf, bool kernel)
-{
- if (kernel && strchr(buf, '['))
- return true;
- if (!kernel && !strchr(buf, '['))
- return true;
- return false;
-}
-
-static bool skip_entry(char *name)
-{
- /*
- * We attach to almost all kernel functions and some of them
- * will cause 'suspicious RCU usage' when fprobe is attached
- * to them. Filter out the current culprits - arch_cpu_idle
- * default_idle and rcu_* functions.
- */
- if (!strcmp(name, "arch_cpu_idle"))
- return true;
- if (!strcmp(name, "default_idle"))
- return true;
- if (!strncmp(name, "rcu_", 4))
- return true;
- if (!strcmp(name, "bpf_dispatcher_xdp_func"))
- return true;
- if (!strncmp(name, "__ftrace_invalid_address__",
- sizeof("__ftrace_invalid_address__") - 1))
- return true;
- return false;
-}
-
-/* Do comparision by ignoring '.llvm.<hash>' suffixes. */
-static int compare_name(const char *name1, const char *name2)
-{
- const char *res1, *res2;
- int len1, len2;
-
- res1 = strstr(name1, ".llvm.");
- res2 = strstr(name2, ".llvm.");
- len1 = res1 ? res1 - name1 : strlen(name1);
- len2 = res2 ? res2 - name2 : strlen(name2);
-
- if (len1 == len2)
- return strncmp(name1, name2, len1);
- if (len1 < len2)
- return strncmp(name1, name2, len1) <= 0 ? -1 : 1;
- return strncmp(name1, name2, len2) >= 0 ? 1 : -1;
-}
-
-static int load_kallsyms_compare(const void *p1, const void *p2)
-{
- return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name);
-}
-
-static int search_kallsyms_compare(const void *p1, const struct ksym *p2)
-{
- return compare_name(p1, p2->name);
-}
-
-static int get_syms(char ***symsp, size_t *cntp, bool kernel)
-{
- size_t cap = 0, cnt = 0;
- char *name = NULL, *ksym_name, **syms = NULL;
- struct hashmap *map;
- struct ksyms *ksyms;
- struct ksym *ks;
- char buf[256];
- FILE *f;
- int err = 0;
-
- ksyms = load_kallsyms_custom_local(load_kallsyms_compare);
- if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_custom_local"))
- return -EINVAL;
-
- /*
- * The available_filter_functions contains many duplicates,
- * but other than that all symbols are usable in kprobe multi
- * interface.
- * Filtering out duplicates by using hashmap__add, which won't
- * add existing entry.
- */
-
- if (access("/sys/kernel/tracing/trace", F_OK) == 0)
- f = fopen("/sys/kernel/tracing/available_filter_functions", "r");
- else
- f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
-
- if (!f)
- return -EINVAL;
-
- map = hashmap__new(symbol_hash, symbol_equal, NULL);
- if (IS_ERR(map)) {
- err = libbpf_get_error(map);
- goto error;
- }
-
- while (fgets(buf, sizeof(buf), f)) {
- if (is_invalid_entry(buf, kernel))
- continue;
-
- free(name);
- if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
- continue;
- if (skip_entry(name))
- continue;
-
- ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare);
- if (!ks) {
- err = -EINVAL;
- goto error;
- }
-
- ksym_name = ks->name;
- err = hashmap__add(map, ksym_name, 0);
- if (err == -EEXIST) {
- err = 0;
- continue;
- }
- if (err)
- goto error;
-
- err = libbpf_ensure_mem((void **) &syms, &cap,
- sizeof(*syms), cnt + 1);
- if (err)
- goto error;
-
- syms[cnt++] = ksym_name;
- }
-
- *symsp = syms;
- *cntp = cnt;
-
-error:
- free(name);
- fclose(f);
- hashmap__free(map);
- if (err)
- free(syms);
- return err;
-}
-
-static int get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel)
-{
- unsigned long *addr, *addrs, *tmp_addrs;
- int err = 0, max_cnt, inc_cnt;
- char *name = NULL;
- size_t cnt = 0;
- char buf[256];
- FILE *f;
-
- if (access("/sys/kernel/tracing/trace", F_OK) == 0)
- f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r");
- else
- f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r");
-
- if (!f)
- return -ENOENT;
-
- /* In my local setup, the number of entries is 50k+ so Let us initially
- * allocate space to hold 64k entries. If 64k is not enough, incrementally
- * increase 1k each time.
- */
- max_cnt = 65536;
- inc_cnt = 1024;
- addrs = malloc(max_cnt * sizeof(long));
- if (addrs == NULL) {
- err = -ENOMEM;
- goto error;
- }
-
- while (fgets(buf, sizeof(buf), f)) {
- if (is_invalid_entry(buf, kernel))
- continue;
-
- free(name);
- if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2)
- continue;
- if (skip_entry(name))
- continue;
-
- if (cnt == max_cnt) {
- max_cnt += inc_cnt;
- tmp_addrs = realloc(addrs, max_cnt);
- if (!tmp_addrs) {
- err = -ENOMEM;
- goto error;
- }
- addrs = tmp_addrs;
- }
-
- addrs[cnt++] = (unsigned long)addr;
- }
-
- *addrsp = addrs;
- *cntp = cnt;
-
-error:
- free(name);
- fclose(f);
- if (err)
- free(addrs);
- return err;
-}
-
static void do_bench_test(struct kprobe_multi_empty *skel, struct bpf_kprobe_multi_opts *opts)
{
long attach_start_ns, attach_end_ns;
@@ -670,7 +457,7 @@ static void test_kprobe_multi_bench_attach(bool kernel)
char **syms = NULL;
size_t cnt = 0;
- if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms"))
+ if (!ASSERT_OK(bpf_get_ksyms(&syms, &cnt, kernel), "bpf_get_ksyms"))
return;
skel = kprobe_multi_empty__open_and_load();
@@ -696,13 +483,13 @@ static void test_kprobe_multi_bench_attach_addr(bool kernel)
size_t cnt = 0;
int err;
- err = get_addrs(&addrs, &cnt, kernel);
+ err = bpf_get_addrs(&addrs, &cnt, kernel);
if (err == -ENOENT) {
test__skip();
return;
}
- if (!ASSERT_OK(err, "get_addrs"))
+ if (!ASSERT_OK(err, "bpf_get_addrs"))
return;
skel = kprobe_multi_empty__open_and_load();
@@ -753,6 +540,30 @@ cleanup:
kprobe_multi_override__destroy(skel);
}
+#ifdef __x86_64__
+static void test_attach_write_ctx(void)
+{
+ struct kprobe_write_ctx *skel = NULL;
+ struct bpf_link *link = NULL;
+
+ skel = kprobe_write_ctx__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_write_ctx__open_and_load"))
+ return;
+
+ link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_multi_write_ctx,
+ "bpf_fentry_test1", NULL);
+ if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_opts"))
+ bpf_link__destroy(link);
+
+ kprobe_write_ctx__destroy(skel);
+}
+#else
+static void test_attach_write_ctx(void)
+{
+ test__skip();
+}
+#endif
+
void serial_test_kprobe_multi_bench_attach(void)
{
if (test__start_subtest("kernel"))
@@ -792,5 +603,7 @@ void test_kprobe_multi_test(void)
test_session_cookie_skel_api();
if (test__start_subtest("unique_match"))
test_unique_match();
+ if (test__start_subtest("attach_write_ctx"))
+ test_attach_write_ctx();
RUN_TESTS(kprobe_multi_verifier);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_excl.c b/tools/testing/selftests/bpf/prog_tests/map_excl.c
new file mode 100644
index 000000000000..6bdc6d6de0da
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_excl.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Google LLC. */
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "map_excl.skel.h"
+
+static void test_map_excl_allowed(void)
+{
+ struct map_excl *skel = map_excl__open();
+ int err;
+
+ err = bpf_map__set_exclusive_program(skel->maps.excl_map, skel->progs.should_have_access);
+ if (!ASSERT_OK(err, "bpf_map__set_exclusive_program"))
+ goto out;
+
+ bpf_program__set_autoload(skel->progs.should_have_access, true);
+ bpf_program__set_autoload(skel->progs.should_not_have_access, false);
+
+ err = map_excl__load(skel);
+ ASSERT_OK(err, "map_excl__load");
+out:
+ map_excl__destroy(skel);
+}
+
+static void test_map_excl_denied(void)
+{
+ struct map_excl *skel = map_excl__open();
+ int err;
+
+ err = bpf_map__set_exclusive_program(skel->maps.excl_map, skel->progs.should_have_access);
+ if (!ASSERT_OK(err, "bpf_map__make_exclusive"))
+ goto out;
+
+ bpf_program__set_autoload(skel->progs.should_have_access, false);
+ bpf_program__set_autoload(skel->progs.should_not_have_access, true);
+
+ err = map_excl__load(skel);
+ ASSERT_EQ(err, -EACCES, "exclusive map access not denied\n");
+out:
+ map_excl__destroy(skel);
+
+}
+
+void test_map_excl(void)
+{
+ if (test__start_subtest("map_excl_allowed"))
+ test_map_excl_allowed();
+ if (test__start_subtest("map_excl_denied"))
+ test_map_excl_denied();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index 6d391d95f96e..70fa7ae93173 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -90,7 +90,7 @@ void test_module_attach(void)
test_module_attach__detach(skel);
- /* attach fentry/fexit and make sure it get's module reference */
+ /* attach fentry/fexit and make sure it gets module reference */
link = bpf_program__attach(skel->progs.handle_fentry);
if (!ASSERT_OK_PTR(link, "attach_fentry"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c
new file mode 100644
index 000000000000..9ae49b587f3e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <test_progs.h>
+
+
+#include "test_pinning_devmap.skel.h"
+
+void test_pinning_devmap_reuse(void)
+{
+ const char *pinpath1 = "/sys/fs/bpf/pinmap1";
+ const char *pinpath2 = "/sys/fs/bpf/pinmap2";
+ struct test_pinning_devmap *skel1 = NULL, *skel2 = NULL;
+ int err;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+
+ /* load the object a first time */
+ skel1 = test_pinning_devmap__open_and_load();
+ if (!ASSERT_OK_PTR(skel1, "skel_load1"))
+ goto out;
+
+ /* load the object a second time, re-using the pinned map */
+ skel2 = test_pinning_devmap__open_and_load();
+ if (!ASSERT_OK_PTR(skel2, "skel_load2"))
+ goto out;
+
+ /* we can close the reference safely without
+ * the map's refcount falling to 0
+ */
+ test_pinning_devmap__destroy(skel1);
+ skel1 = NULL;
+
+ /* now, swap the pins */
+ err = renameat2(0, pinpath1, 0, pinpath2, RENAME_EXCHANGE);
+ if (!ASSERT_OK(err, "swap pins"))
+ goto out;
+
+ /* load the object again, this time the re-use should fail */
+ skel1 = test_pinning_devmap__open_and_load();
+ if (!ASSERT_ERR_PTR(skel1, "skel_load3"))
+ goto out;
+
+out:
+ unlink(pinpath1);
+ unlink(pinpath2);
+ test_pinning_devmap__destroy(skel1);
+ test_pinning_devmap__destroy(skel2);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
index 14f2796076e0..7607cfc2408c 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
@@ -54,3 +54,128 @@ void test_prog_tests_framework(void)
return;
clear_test_state(state);
}
+
+static void dummy_emit(const char *buf, bool force) {}
+
+void test_prog_tests_framework_expected_msgs(void)
+{
+ struct expected_msgs msgs;
+ int i, j, error_cnt;
+ const struct {
+ const char *name;
+ const char *log;
+ const char *expected;
+ struct expect_msg *pats;
+ } cases[] = {
+ {
+ .name = "simple-ok",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "simple-fail",
+ .log = "aaabbbddd",
+ .expected = "MATCHED SUBSTR: 'aaa'\n"
+ "EXPECTED SUBSTR: 'ccc'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-ok-mid",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "foo", .negative = true },
+ { .substr = "bar", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-ok-tail",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "foo", .negative = true },
+ {}
+ }
+ },
+ {
+ .name = "negative-ok-head",
+ .log = "aaabbbccc",
+ .pats = (struct expect_msg[]) {
+ { .substr = "foo", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-head",
+ .log = "aaabbbccc",
+ .expected = "UNEXPECTED SUBSTR: 'aaa'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa", .negative = true },
+ { .substr = "bbb" },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-tail",
+ .log = "aaabbbccc",
+ .expected = "UNEXPECTED SUBSTR: 'ccc'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "bbb" },
+ { .substr = "ccc", .negative = true },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-mid-1",
+ .log = "aaabbbccc",
+ .expected = "UNEXPECTED SUBSTR: 'bbb'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "bbb", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ },
+ {
+ .name = "negative-fail-mid-2",
+ .log = "aaabbb222ccc",
+ .expected = "UNEXPECTED SUBSTR: '222'\n",
+ .pats = (struct expect_msg[]) {
+ { .substr = "aaa" },
+ { .substr = "222", .negative = true },
+ { .substr = "bbb", .negative = true },
+ { .substr = "ccc" },
+ {}
+ }
+ }
+ };
+
+ for (i = 0; i < ARRAY_SIZE(cases); i++) {
+ if (test__start_subtest(cases[i].name)) {
+ error_cnt = env.subtest_state->error_cnt;
+ msgs.patterns = cases[i].pats;
+ msgs.cnt = 0;
+ for (j = 0; cases[i].pats[j].substr; j++)
+ msgs.cnt++;
+ validate_msgs(cases[i].log, &msgs, dummy_emit);
+ fflush(stderr);
+ env.subtest_state->error_cnt = error_cnt;
+ if (cases[i].expected)
+ ASSERT_HAS_SUBSTR(env.subtest_state->log_buf, cases[i].expected, "expected output");
+ else
+ ASSERT_STREQ(env.subtest_state->log_buf, "", "expected no output");
+ test__end_subtest();
+ }
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index e261b0e872db..d93a0c7b1786 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -623,7 +623,7 @@ static void range_cond(enum num_t t, struct range x, struct range y,
*newx = range(t, x.a, x.b);
*newy = range(t, y.a + 1, y.b);
} else if (x.a == x.b && x.b == y.b) {
- /* X is a constant matching rigth side of Y */
+ /* X is a constant matching right side of Y */
*newx = range(t, x.a, x.b);
*newy = range(t, y.a, y.b - 1);
} else if (y.a == y.b && x.a == y.a) {
@@ -631,7 +631,7 @@ static void range_cond(enum num_t t, struct range x, struct range y,
*newx = range(t, x.a + 1, x.b);
*newy = range(t, y.a, y.b);
} else if (y.a == y.b && x.b == y.b) {
- /* Y is a constant matching rigth side of X */
+ /* Y is a constant matching right side of X */
*newx = range(t, x.a, x.b - 1);
*newy = range(t, y.a, y.b);
} else {
diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
index 0703e987df89..8c6c2043a432 100644
--- a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
@@ -99,3 +99,19 @@ end:
res_spin_lock__destroy(skel);
return;
}
+
+void serial_test_res_spin_lock_stress(void)
+{
+ if (libbpf_num_possible_cpus() < 3) {
+ test__skip();
+ return;
+ }
+
+ ASSERT_OK(load_module("bpf_test_rqspinlock.ko", false), "load module AA");
+ sleep(5);
+ unload_module("bpf_test_rqspinlock", false);
+
+ ASSERT_OK(load_module_params("bpf_test_rqspinlock.ko", "test_ab=1", false), "load module ABBA");
+ sleep(5);
+ unload_module("bpf_test_rqspinlock", false);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
index e3ea5dc2f697..254fbfeab06a 100644
--- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -13,22 +13,22 @@ static struct {
const char *err_msg;
} spin_lock_fail_tests[] = {
{ "lock_id_kptr_preserve",
- "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2) "
- "R1_w=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
+ "5: (bf) r1 = r0 ; R0=ptr_foo(id=2,ref_obj_id=2) "
+ "R1=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=ptr_ expected=percpu_ptr_" },
{ "lock_id_global_zero",
- "; R1_w=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n"
+ "; R1=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=map_value expected=percpu_ptr_" },
{ "lock_id_mapval_preserve",
"[0-9]\\+: (bf) r1 = r0 ;"
- " R0_w=map_value(id=1,map=array_map,ks=4,vs=8)"
- " R1_w=map_value(id=1,map=array_map,ks=4,vs=8)\n"
+ " R0=map_value(id=1,map=array_map,ks=4,vs=8)"
+ " R1=map_value(id=1,map=array_map,ks=4,vs=8)\n"
"[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=map_value expected=percpu_ptr_" },
{ "lock_id_innermapval_preserve",
"[0-9]\\+: (bf) r1 = r0 ;"
" R0=map_value(id=2,ks=4,vs=8)"
- " R1_w=map_value(id=2,ks=4,vs=8)\n"
+ " R1=map_value(id=2,ks=4,vs=8)\n"
"[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=map_value expected=percpu_ptr_" },
{ "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" },
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
index b7ba5cd47d96..271b5cc9fc01 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
@@ -39,7 +39,7 @@ retry:
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 0832fd787457..b277dddd5af7 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -66,7 +66,7 @@ retry:
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
index df59e4ae2951..c23b97414813 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -1,46 +1,27 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "stacktrace_map.skel.h"
void test_stacktrace_map(void)
{
+ struct stacktrace_map *skel;
int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
- const char *prog_name = "oncpu";
- int err, prog_fd, stack_trace_len;
- const char *file = "./test_stacktrace_map.bpf.o";
- __u32 key, val, duration = 0;
- struct bpf_program *prog;
- struct bpf_object *obj;
- struct bpf_link *link;
+ int err, stack_trace_len;
+ __u32 key, val, stack_id, duration = 0;
+ __u64 stack[PERF_MAX_STACK_DEPTH];
- err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
- if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ skel = stacktrace_map__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
- prog = bpf_object__find_program_by_name(obj, prog_name);
- if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
- goto close_prog;
-
- link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch");
- if (!ASSERT_OK_PTR(link, "attach_tp"))
- goto close_prog;
-
- /* find map fds */
- control_map_fd = bpf_find_map(__func__, obj, "control_map");
- if (CHECK_FAIL(control_map_fd < 0))
- goto disable_pmu;
-
- stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
- if (CHECK_FAIL(stackid_hmap_fd < 0))
- goto disable_pmu;
-
- stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
- if (CHECK_FAIL(stackmap_fd < 0))
- goto disable_pmu;
-
- stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
- if (CHECK_FAIL(stack_amap_fd < 0))
- goto disable_pmu;
+ control_map_fd = bpf_map__fd(skel->maps.control_map);
+ stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
+ stackmap_fd = bpf_map__fd(skel->maps.stackmap);
+ stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
+ err = stacktrace_map__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
/* give some time for bpf program run */
sleep(1);
@@ -50,26 +31,32 @@ void test_stacktrace_map(void)
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto out;
err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto out;
stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
-
-disable_pmu:
- bpf_link__destroy(link);
-close_prog:
- bpf_object__close(obj);
+ goto out;
+
+ stack_id = skel->bss->stack_id;
+ err = bpf_map_lookup_and_delete_elem(stackmap_fd, &stack_id, stack);
+ if (!ASSERT_OK(err, "lookup and delete target stack_id"))
+ goto out;
+
+ err = bpf_map_lookup_elem(stackmap_fd, &stack_id, stack);
+ if (!ASSERT_EQ(err, -ENOENT, "lookup deleted stack_id"))
+ goto out;
+out:
+ stacktrace_map__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
index c6ef06f55cdb..e985d51d3d47 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -5,7 +5,7 @@ void test_stacktrace_map_raw_tp(void)
{
const char *prog_name = "oncpu";
int control_map_fd, stackid_hmap_fd, stackmap_fd;
- const char *file = "./test_stacktrace_map.bpf.o";
+ const char *file = "./stacktrace_map.bpf.o";
__u32 key, val, duration = 0;
int err, prog_fd;
struct bpf_program *prog;
@@ -46,7 +46,7 @@ void test_stacktrace_map_raw_tp(void)
bpf_map_update_elem(control_map_fd, &key, &val, 0);
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
index 1932b1e0685c..dc2ccf6a14d1 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
@@ -40,7 +40,7 @@ void test_stacktrace_map_skip(void)
skel->bss->control = 1;
/* for every element in stackid_hmap, we can find a corresponding one
- * in stackmap, and vise versa.
+ * in stackmap, and vice versa.
*/
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (!ASSERT_OK(err, "compare_map_keys stackid_hmap vs. stackmap"))
diff --git a/tools/testing/selftests/bpf/prog_tests/stream.c b/tools/testing/selftests/bpf/prog_tests/stream.c
index d9f0185dca61..c3cce5c292bd 100644
--- a/tools/testing/selftests/bpf/prog_tests/stream.c
+++ b/tools/testing/selftests/bpf/prog_tests/stream.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
#include <test_progs.h>
#include <sys/mman.h>
-#include <regex.h>
#include "stream.skel.h"
#include "stream_fail.skel.h"
@@ -18,87 +17,6 @@ void test_stream_success(void)
return;
}
-struct {
- int prog_off;
- const char *errstr;
-} stream_error_arr[] = {
- {
- offsetof(struct stream, progs.stream_cond_break),
- "ERROR: Timeout detected for may_goto instruction\n"
- "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n"
- "Call trace:\n"
- "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
- "|[ \t]+[^\n]+\n)*",
- },
- {
- offsetof(struct stream, progs.stream_deadlock),
- "ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock\n"
- "Attempted lock = (0x[0-9a-fA-F]+)\n"
- "Total held locks = 1\n"
- "Held lock\\[ 0\\] = \\1\n" // Lock address must match
- "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n"
- "Call trace:\n"
- "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
- "|[ \t]+[^\n]+\n)*",
- },
-};
-
-static int match_regex(const char *pattern, const char *string)
-{
- int err, rc;
- regex_t re;
-
- err = regcomp(&re, pattern, REG_EXTENDED | REG_NEWLINE);
- if (err)
- return -1;
- rc = regexec(&re, string, 0, NULL, 0);
- regfree(&re);
- return rc == 0 ? 1 : 0;
-}
-
-void test_stream_errors(void)
-{
- LIBBPF_OPTS(bpf_test_run_opts, opts);
- LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
- struct stream *skel;
- int ret, prog_fd;
- char buf[1024];
-
- skel = stream__open_and_load();
- if (!ASSERT_OK_PTR(skel, "stream__open_and_load"))
- return;
-
- for (int i = 0; i < ARRAY_SIZE(stream_error_arr); i++) {
- struct bpf_program **prog;
-
- prog = (struct bpf_program **)(((char *)skel) + stream_error_arr[i].prog_off);
- prog_fd = bpf_program__fd(*prog);
- ret = bpf_prog_test_run_opts(prog_fd, &opts);
- ASSERT_OK(ret, "ret");
- ASSERT_OK(opts.retval, "retval");
-
-#if !defined(__x86_64__)
- ASSERT_TRUE(1, "Timed may_goto unsupported, skip.");
- if (i == 0) {
- ret = bpf_prog_stream_read(prog_fd, 2, buf, sizeof(buf), &ropts);
- ASSERT_EQ(ret, 0, "stream read");
- continue;
- }
-#endif
-
- ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts);
- ASSERT_GT(ret, 0, "stream read");
- ASSERT_LE(ret, 1023, "len for buf");
- buf[ret] = '\0';
-
- ret = match_regex(stream_error_arr[i].errstr, buf);
- if (!ASSERT_TRUE(ret == 1, "regex match"))
- fprintf(stderr, "Output from stream:\n%s\n", buf);
- }
-
- stream__destroy(skel);
-}
-
void test_stream_syscall(void)
{
LIBBPF_OPTS(bpf_test_run_opts, opts);
@@ -139,3 +57,52 @@ void test_stream_syscall(void)
stream__destroy(skel);
}
+
+static void test_address(struct bpf_program *prog, unsigned long *fault_addr_p)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
+ int ret, prog_fd;
+ char fault_addr[64];
+ char buf[1024];
+
+ prog_fd = bpf_program__fd(prog);
+
+ ret = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(ret, "ret");
+ ASSERT_OK(opts.retval, "retval");
+
+ sprintf(fault_addr, "0x%lx", *fault_addr_p);
+
+ ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts);
+ ASSERT_GT(ret, 0, "stream read");
+ ASSERT_LE(ret, 1023, "len for buf");
+ buf[ret] = '\0';
+
+ if (!ASSERT_HAS_SUBSTR(buf, fault_addr, "fault_addr")) {
+ fprintf(stderr, "Output from stream:\n%s\n", buf);
+ fprintf(stderr, "Fault Addr: %s\n", fault_addr);
+ }
+}
+
+void test_stream_arena_fault_address(void)
+{
+ struct stream *skel;
+
+#if !defined(__x86_64__) && !defined(__aarch64__)
+ printf("%s:SKIP: arena fault reporting not supported\n", __func__);
+ test__skip();
+ return;
+#endif
+
+ skel = stream__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "stream__open_and_load"))
+ return;
+
+ if (test__start_subtest("read_fault"))
+ test_address(skel->progs.stream_arena_read_fault, &skel->bss->fault_addr);
+ if (test__start_subtest("write_fault"))
+ test_address(skel->progs.stream_arena_write_fault, &skel->bss->fault_addr);
+
+ stream__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
index 35af8044d059..4d66fad3c8bd 100644
--- a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
+++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
@@ -8,6 +8,7 @@
static const char * const test_cases[] = {
"strcmp",
+ "strcasecmp",
"strchr",
"strchrnul",
"strnchr",
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_data.h b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
new file mode 100644
index 000000000000..2de38776a2d4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TASK_LOCAL_DATA_H
+#define __TASK_LOCAL_DATA_H
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdatomic.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+#include <pthread.h>
+#endif
+
+#include <bpf/bpf.h>
+
+/*
+ * OPTIONS
+ *
+ * Define the option before including the header
+ *
+ * TLD_FREE_DATA_ON_THREAD_EXIT - Frees memory on thread exit automatically
+ *
+ * Thread-specific memory for storing TLD is allocated lazily on the first call to
+ * tld_get_data(). The thread that calls it must also call tld_free() on thread exit
+ * to prevent memory leak. Pthread will be included if the option is defined. A pthread
+ * key will be registered with a destructor that calls tld_free().
+ *
+ *
+ * TLD_DYN_DATA_SIZE - The maximum size of memory allocated for TLDs created dynamically
+ * (default: 64 bytes)
+ *
+ * A TLD can be defined statically using TLD_DEFINE_KEY() or created on the fly using
+ * tld_create_key(). As the total size of TLDs created with tld_create_key() cannot be
+ * possibly known statically, a memory area of size TLD_DYN_DATA_SIZE will be allocated
+ * for these TLDs. This additional memory is allocated for every thread that calls
+ * tld_get_data() even if no tld_create_key are actually called, so be mindful of
+ * potential memory wastage. Use TLD_DEFINE_KEY() whenever possible as just enough memory
+ * will be allocated for TLDs created with it.
+ *
+ *
+ * TLD_NAME_LEN - The maximum length of the name of a TLD (default: 62)
+ *
+ * Setting TLD_NAME_LEN will affect the maximum number of TLDs a process can store,
+ * TLD_MAX_DATA_CNT.
+ *
+ *
+ * TLD_DATA_USE_ALIGNED_ALLOC - Always use aligned_alloc() instead of malloc()
+ *
+ * When allocating the memory for storing TLDs, we need to make sure there is a memory
+ * region of the X bytes within a page. This is due to the limit posed by UPTR: memory
+ * pinned to the kernel cannot exceed a page nor can it cross the page boundary. The
+ * library normally calls malloc(2*X) given X bytes of total TLDs, and only uses
+ * aligned_alloc(PAGE_SIZE, X) when X >= PAGE_SIZE / 2. This is to reduce memory wastage
+ * as not all memory allocator can use the exact amount of memory requested to fulfill
+ * aligned_alloc(). For example, some may round the size up to the alignment. Enable the
+ * option to always use aligned_alloc() if the implementation has low memory overhead.
+ */
+
+#define TLD_PAGE_SIZE getpagesize()
+#define TLD_PAGE_MASK (~(TLD_PAGE_SIZE - 1))
+
+#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1))
+#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1)
+
+#define TLD_READ_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+#ifndef TLD_DYN_DATA_SIZE
+#define TLD_DYN_DATA_SIZE 64
+#endif
+
+#define TLD_MAX_DATA_CNT (TLD_PAGE_SIZE / sizeof(struct tld_metadata) - 1)
+
+#ifndef TLD_NAME_LEN
+#define TLD_NAME_LEN 62
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ __s16 off;
+} tld_key_t;
+
+struct tld_metadata {
+ char name[TLD_NAME_LEN];
+ _Atomic __u16 size;
+};
+
+struct tld_meta_u {
+ _Atomic __u8 cnt;
+ __u16 size;
+ struct tld_metadata metadata[];
+};
+
+struct tld_data_u {
+ __u64 start; /* offset of tld_data_u->data in a page */
+ char data[];
+};
+
+struct tld_map_value {
+ void *data;
+ struct tld_meta_u *meta;
+};
+
+struct tld_meta_u * _Atomic tld_meta_p __attribute__((weak));
+__thread struct tld_data_u *tld_data_p __attribute__((weak));
+__thread void *tld_data_alloc_p __attribute__((weak));
+
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+pthread_key_t tld_pthread_key __attribute__((weak));
+
+static void tld_free(void);
+
+static void __tld_thread_exit_handler(void *unused)
+{
+ tld_free();
+}
+#endif
+
+static int __tld_init_meta_p(void)
+{
+ struct tld_meta_u *meta, *uninit = NULL;
+ int err = 0;
+
+ meta = (struct tld_meta_u *)aligned_alloc(TLD_PAGE_SIZE, TLD_PAGE_SIZE);
+ if (!meta) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ memset(meta, 0, TLD_PAGE_SIZE);
+ meta->size = TLD_DYN_DATA_SIZE;
+
+ if (!atomic_compare_exchange_strong(&tld_meta_p, &uninit, meta)) {
+ free(meta);
+ goto out;
+ }
+
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+ pthread_key_create(&tld_pthread_key, __tld_thread_exit_handler);
+#endif
+out:
+ return err;
+}
+
+static int __tld_init_data_p(int map_fd)
+{
+ bool use_aligned_alloc = false;
+ struct tld_map_value map_val;
+ struct tld_data_u *data;
+ void *data_alloc = NULL;
+ int err, tid_fd = -1;
+
+ tid_fd = syscall(SYS_pidfd_open, sys_gettid(), O_EXCL);
+ if (tid_fd < 0) {
+ err = -errno;
+ goto out;
+ }
+
+#ifdef TLD_DATA_USE_ALIGNED_ALLOC
+ use_aligned_alloc = true;
+#endif
+
+ /*
+ * tld_meta_p->size = TLD_DYN_DATA_SIZE +
+ * total size of TLDs defined via TLD_DEFINE_KEY()
+ */
+ data_alloc = (use_aligned_alloc || tld_meta_p->size * 2 >= TLD_PAGE_SIZE) ?
+ aligned_alloc(TLD_PAGE_SIZE, tld_meta_p->size) :
+ malloc(tld_meta_p->size * 2);
+ if (!data_alloc) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Always pass a page-aligned address to UPTR since the size of tld_map_value::data
+ * is a page in BTF. If data_alloc spans across two pages, use the page that contains large
+ * enough memory.
+ */
+ if (TLD_PAGE_SIZE - (~TLD_PAGE_MASK & (intptr_t)data_alloc) >= tld_meta_p->size) {
+ map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data_alloc);
+ data = data_alloc;
+ data->start = (~TLD_PAGE_MASK & (intptr_t)data_alloc) +
+ offsetof(struct tld_data_u, data);
+ } else {
+ map_val.data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE));
+ data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE));
+ data->start = offsetof(struct tld_data_u, data);
+ }
+ map_val.meta = TLD_READ_ONCE(tld_meta_p);
+
+ err = bpf_map_update_elem(map_fd, &tid_fd, &map_val, 0);
+ if (err) {
+ free(data_alloc);
+ goto out;
+ }
+
+ tld_data_p = data;
+ tld_data_alloc_p = data_alloc;
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+ pthread_setspecific(tld_pthread_key, (void *)1);
+#endif
+out:
+ if (tid_fd >= 0)
+ close(tid_fd);
+ return err;
+}
+
+static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data)
+{
+ int err, i, sz, off = 0;
+ __u8 cnt;
+
+ if (!TLD_READ_ONCE(tld_meta_p)) {
+ err = __tld_init_meta_p();
+ if (err)
+ return (tld_key_t){err};
+ }
+
+ for (i = 0; i < TLD_MAX_DATA_CNT; i++) {
+retry:
+ cnt = atomic_load(&tld_meta_p->cnt);
+ if (i < cnt) {
+ /* A metadata is not ready until size is updated with a non-zero value */
+ while (!(sz = atomic_load(&tld_meta_p->metadata[i].size)))
+ sched_yield();
+
+ if (!strncmp(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN))
+ return (tld_key_t){-EEXIST};
+
+ off += TLD_ROUND_UP(sz, 8);
+ continue;
+ }
+
+ /*
+ * TLD_DEFINE_KEY() is given memory upto a page while at most
+ * TLD_DYN_DATA_SIZE is allocated for tld_create_key()
+ */
+ if (dyn_data) {
+ if (off + TLD_ROUND_UP(size, 8) > tld_meta_p->size)
+ return (tld_key_t){-E2BIG};
+ } else {
+ if (off + TLD_ROUND_UP(size, 8) > TLD_PAGE_SIZE - sizeof(struct tld_data_u))
+ return (tld_key_t){-E2BIG};
+ tld_meta_p->size += TLD_ROUND_UP(size, 8);
+ }
+
+ /*
+ * Only one tld_create_key() can increase the current cnt by one and
+ * takes the latest available slot. Other threads will check again if a new
+ * TLD can still be added, and then compete for the new slot after the
+ * succeeding thread update the size.
+ */
+ if (!atomic_compare_exchange_strong(&tld_meta_p->cnt, &cnt, cnt + 1))
+ goto retry;
+
+ strncpy(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN);
+ atomic_store(&tld_meta_p->metadata[i].size, size);
+ return (tld_key_t){(__s16)off};
+ }
+
+ return (tld_key_t){-ENOSPC};
+}
+
+/**
+ * TLD_DEFINE_KEY() - Define a TLD and a global variable key associated with the TLD.
+ *
+ * @name: The name of the TLD
+ * @size: The size of the TLD
+ * @key: The variable name of the key. Cannot exceed TLD_NAME_LEN
+ *
+ * The macro can only be used in file scope.
+ *
+ * A global variable key of opaque type, tld_key_t, will be declared and initialized before
+ * main() starts. Use tld_key_is_err() or tld_key_err_or_zero() later to check if the key
+ * creation succeeded. Pass the key to tld_get_data() to get a pointer to the TLD.
+ * bpf programs can also fetch the same key by name.
+ *
+ * The total size of TLDs created using TLD_DEFINE_KEY() cannot exceed a page. Just
+ * enough memory will be allocated for each thread on the first call to tld_get_data().
+ */
+#define TLD_DEFINE_KEY(key, name, size) \
+tld_key_t key; \
+ \
+__attribute__((constructor)) \
+void __tld_define_key_##key(void) \
+{ \
+ key = __tld_create_key(name, size, false); \
+}
+
+/**
+ * tld_create_key() - Create a TLD and return a key associated with the TLD.
+ *
+ * @name: The name the TLD
+ * @size: The size of the TLD
+ *
+ * Return an opaque object key. Use tld_key_is_err() or tld_key_err_or_zero() to check
+ * if the key creation succeeded. Pass the key to tld_get_data() to get a pointer to
+ * locate the TLD. bpf programs can also fetch the same key by name.
+ *
+ * Use tld_create_key() only when a TLD needs to be created dynamically (e.g., @name is
+ * not known statically or a TLD needs to be created conditionally)
+ *
+ * An additional TLD_DYN_DATA_SIZE bytes are allocated per-thread to accommodate TLDs
+ * created dynamically with tld_create_key(). Since only a user page is pinned to the
+ * kernel, when TLDs created with TLD_DEFINE_KEY() uses more than TLD_PAGE_SIZE -
+ * TLD_DYN_DATA_SIZE, the buffer size will be limited to the rest of the page.
+ */
+__attribute__((unused))
+static tld_key_t tld_create_key(const char *name, size_t size)
+{
+ return __tld_create_key(name, size, true);
+}
+
+__attribute__((unused))
+static inline bool tld_key_is_err(tld_key_t key)
+{
+ return key.off < 0;
+}
+
+__attribute__((unused))
+static inline int tld_key_err_or_zero(tld_key_t key)
+{
+ return tld_key_is_err(key) ? key.off : 0;
+}
+
+/**
+ * tld_get_data() - Get a pointer to the TLD associated with the given key of the
+ * calling thread.
+ *
+ * @map_fd: A file descriptor of tld_data_map, the underlying BPF task local storage map
+ * of task local data.
+ * @key: A key object created by TLD_DEFINE_KEY() or tld_create_key().
+ *
+ * Return a pointer to the TLD if the key is valid; NULL if not enough memory for TLD
+ * for this thread, or the key is invalid. The returned pointer is guaranteed to be 8-byte
+ * aligned.
+ *
+ * Threads that call tld_get_data() must call tld_free() on exit to prevent
+ * memory leak if TLD_FREE_DATA_ON_THREAD_EXIT is not defined.
+ */
+__attribute__((unused))
+static void *tld_get_data(int map_fd, tld_key_t key)
+{
+ if (!TLD_READ_ONCE(tld_meta_p))
+ return NULL;
+
+ /* tld_data_p is allocated on the first invocation of tld_get_data() */
+ if (!tld_data_p && __tld_init_data_p(map_fd))
+ return NULL;
+
+ return tld_data_p->data + key.off;
+}
+
+/**
+ * tld_free() - Free task local data memory of the calling thread
+ *
+ * For the calling thread, all pointers to TLDs acquired before will become invalid.
+ *
+ * Users must call tld_free() on thread exit to prevent memory leak. Alternatively,
+ * define TLD_FREE_DATA_ON_THREAD_EXIT and a thread exit handler will be registered
+ * to free the memory automatically.
+ */
+__attribute__((unused))
+static void tld_free(void)
+{
+ if (tld_data_alloc_p) {
+ free(tld_data_alloc_p);
+ tld_data_alloc_p = NULL;
+ tld_data_p = NULL;
+ }
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __TASK_LOCAL_DATA_H */
diff --git a/tools/testing/selftests/bpf/prog_tests/task_work_stress.c b/tools/testing/selftests/bpf/prog_tests/task_work_stress.c
new file mode 100644
index 000000000000..450d17d91a56
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_work_stress.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <string.h>
+#include <stdio.h>
+#include "task_work_stress.skel.h"
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+#include <time.h>
+#include <stdlib.h>
+#include <stdatomic.h>
+
+struct test_data {
+ int prog_fd;
+ atomic_int exit;
+};
+
+void *runner(void *test_data)
+{
+ struct test_data *td = test_data;
+ int err = 0;
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ while (!err && !atomic_load(&td->exit))
+ err = bpf_prog_test_run_opts(td->prog_fd, &opts);
+
+ return NULL;
+}
+
+static int get_env_int(const char *str, int def)
+{
+ const char *s = getenv(str);
+ char *end;
+ int retval;
+
+ if (!s || !*s)
+ return def;
+ errno = 0;
+ retval = strtol(s, &end, 10);
+ if (errno || *end || retval < 0)
+ return def;
+ return retval;
+}
+
+static void task_work_run(bool enable_delete)
+{
+ struct task_work_stress *skel;
+ struct bpf_program *scheduler, *deleter;
+ int nthreads = 16;
+ int test_time_s = get_env_int("BPF_TASK_WORK_TEST_TIME", 1);
+ pthread_t tid[nthreads], tid_del;
+ bool started[nthreads], started_del = false;
+ struct test_data td_sched = { .exit = 0 }, td_del = { .exit = 1 };
+ int i, err;
+
+ skel = task_work_stress__open();
+ if (!ASSERT_OK_PTR(skel, "task_work__open"))
+ return;
+
+ scheduler = bpf_object__find_program_by_name(skel->obj, "schedule_task_work");
+ bpf_program__set_autoload(scheduler, true);
+
+ deleter = bpf_object__find_program_by_name(skel->obj, "delete_task_work");
+ bpf_program__set_autoload(deleter, true);
+
+ err = task_work_stress__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ for (i = 0; i < nthreads; ++i)
+ started[i] = false;
+
+ td_sched.prog_fd = bpf_program__fd(scheduler);
+ for (i = 0; i < nthreads; ++i) {
+ if (pthread_create(&tid[i], NULL, runner, &td_sched) != 0) {
+ fprintf(stderr, "could not start thread");
+ goto cancel;
+ }
+ started[i] = true;
+ }
+
+ if (enable_delete)
+ atomic_store(&td_del.exit, 0);
+
+ td_del.prog_fd = bpf_program__fd(deleter);
+ if (pthread_create(&tid_del, NULL, runner, &td_del) != 0) {
+ fprintf(stderr, "could not start thread");
+ goto cancel;
+ }
+ started_del = true;
+
+ /* Run stress test for some time */
+ sleep(test_time_s);
+
+cancel:
+ atomic_store(&td_sched.exit, 1);
+ atomic_store(&td_del.exit, 1);
+ for (i = 0; i < nthreads; ++i) {
+ if (started[i])
+ pthread_join(tid[i], NULL);
+ }
+
+ if (started_del)
+ pthread_join(tid_del, NULL);
+
+ ASSERT_GT(skel->bss->callback_scheduled, 0, "work scheduled");
+ /* Some scheduling attempts should have failed due to contention */
+ ASSERT_GT(skel->bss->schedule_error, 0, "schedule error");
+
+ if (enable_delete) {
+ /* If delete thread is enabled, it has cancelled some callbacks */
+ ASSERT_GT(skel->bss->delete_success, 0, "delete success");
+ ASSERT_LT(skel->bss->callback_success, skel->bss->callback_scheduled, "callbacks");
+ } else {
+ /* Without delete thread number of scheduled callbacks is the same as fired */
+ ASSERT_EQ(skel->bss->callback_success, skel->bss->callback_scheduled, "callbacks");
+ }
+
+cleanup:
+ task_work_stress__destroy(skel);
+}
+
+void test_task_work_stress(void)
+{
+ if (test__start_subtest("no_delete"))
+ task_work_run(false);
+ if (test__start_subtest("with_delete"))
+ task_work_run(true);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c
new file mode 100644
index 000000000000..fd8762ba4b67
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "struct_ops_id_ops_mapping1.skel.h"
+#include "struct_ops_id_ops_mapping2.skel.h"
+
+static void test_st_ops_id_ops_mapping(void)
+{
+ struct struct_ops_id_ops_mapping1 *skel1 = NULL;
+ struct struct_ops_id_ops_mapping2 *skel2 = NULL;
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ int err, pid, prog1_fd, prog2_fd;
+
+ skel1 = struct_ops_id_ops_mapping1__open_and_load();
+ if (!ASSERT_OK_PTR(skel1, "struct_ops_id_ops_mapping1__open"))
+ goto out;
+
+ skel2 = struct_ops_id_ops_mapping2__open_and_load();
+ if (!ASSERT_OK_PTR(skel2, "struct_ops_id_ops_mapping2__open"))
+ goto out;
+
+ err = bpf_map_get_info_by_fd(bpf_map__fd(skel1->maps.st_ops_map),
+ &info, &len);
+ if (!ASSERT_OK(err, "bpf_map_get_info_by_fd"))
+ goto out;
+
+ skel1->bss->st_ops_id = info.id;
+
+ err = bpf_map_get_info_by_fd(bpf_map__fd(skel2->maps.st_ops_map),
+ &info, &len);
+ if (!ASSERT_OK(err, "bpf_map_get_info_by_fd"))
+ goto out;
+
+ skel2->bss->st_ops_id = info.id;
+
+ err = struct_ops_id_ops_mapping1__attach(skel1);
+ if (!ASSERT_OK(err, "struct_ops_id_ops_mapping1__attach"))
+ goto out;
+
+ err = struct_ops_id_ops_mapping2__attach(skel2);
+ if (!ASSERT_OK(err, "struct_ops_id_ops_mapping2__attach"))
+ goto out;
+
+ /* run tracing prog that calls .test_1 and checks return */
+ pid = getpid();
+ skel1->bss->test_pid = pid;
+ skel2->bss->test_pid = pid;
+ sys_gettid();
+ skel1->bss->test_pid = 0;
+ skel2->bss->test_pid = 0;
+
+ /* run syscall_prog that calls .test_1 and checks return */
+ prog1_fd = bpf_program__fd(skel1->progs.syscall_prog);
+ err = bpf_prog_test_run_opts(prog1_fd, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ prog2_fd = bpf_program__fd(skel2->progs.syscall_prog);
+ err = bpf_prog_test_run_opts(prog2_fd, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ ASSERT_EQ(skel1->bss->test_err, 0, "skel1->bss->test_err");
+ ASSERT_EQ(skel2->bss->test_err, 0, "skel2->bss->test_err");
+
+out:
+ struct_ops_id_ops_mapping1__destroy(skel1);
+ struct_ops_id_ops_mapping2__destroy(skel2);
+}
+
+void test_struct_ops_id_ops_mapping(void)
+{
+ if (test__start_subtest("st_ops_id_ops_mapping"))
+ test_st_ops_id_ops_mapping();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
new file mode 100644
index 000000000000..9fd6306b455c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <pthread.h>
+#include <bpf/btf.h>
+#include <test_progs.h>
+
+#define TLD_FREE_DATA_ON_THREAD_EXIT
+#define TLD_DYN_DATA_SIZE 4096
+#include "task_local_data.h"
+
+struct test_tld_struct {
+ __u64 a;
+ __u64 b;
+ __u64 c;
+ __u64 d;
+};
+
+#include "test_task_local_data.skel.h"
+
+TLD_DEFINE_KEY(value0_key, "value0", sizeof(int));
+
+/*
+ * Reset task local data between subtests by clearing metadata other
+ * than the statically defined value0. This is safe as subtests run
+ * sequentially. Users of task local data library should not touch
+ * library internal.
+ */
+static void reset_tld(void)
+{
+ if (TLD_READ_ONCE(tld_meta_p)) {
+ /* Remove TLDs created by tld_create_key() */
+ tld_meta_p->cnt = 1;
+ tld_meta_p->size = TLD_DYN_DATA_SIZE;
+ memset(&tld_meta_p->metadata[1], 0,
+ (TLD_MAX_DATA_CNT - 1) * sizeof(struct tld_metadata));
+ }
+}
+
+/* Serialize access to bpf program's global variables */
+static pthread_mutex_t global_mutex;
+
+static tld_key_t *tld_keys;
+
+#define TEST_BASIC_THREAD_NUM 32
+
+void *test_task_local_data_basic_thread(void *arg)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct test_task_local_data *skel = (struct test_task_local_data *)arg;
+ int fd, err, tid, *value0, *value1;
+ struct test_tld_struct *value2;
+
+ fd = bpf_map__fd(skel->maps.tld_data_map);
+
+ value0 = tld_get_data(fd, value0_key);
+ if (!ASSERT_OK_PTR(value0, "tld_get_data"))
+ goto out;
+
+ value1 = tld_get_data(fd, tld_keys[1]);
+ if (!ASSERT_OK_PTR(value1, "tld_get_data"))
+ goto out;
+
+ value2 = tld_get_data(fd, tld_keys[2]);
+ if (!ASSERT_OK_PTR(value2, "tld_get_data"))
+ goto out;
+
+ tid = sys_gettid();
+
+ *value0 = tid + 0;
+ *value1 = tid + 1;
+ value2->a = tid + 2;
+ value2->b = tid + 3;
+ value2->c = tid + 4;
+ value2->d = tid + 5;
+
+ pthread_mutex_lock(&global_mutex);
+ /* Run task_main that read task local data and save to global variables */
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts);
+ ASSERT_OK(err, "run task_main");
+ ASSERT_OK(opts.retval, "task_main retval");
+
+ ASSERT_EQ(skel->bss->test_value0, tid + 0, "tld_get_data value0");
+ ASSERT_EQ(skel->bss->test_value1, tid + 1, "tld_get_data value1");
+ ASSERT_EQ(skel->bss->test_value2.a, tid + 2, "tld_get_data value2.a");
+ ASSERT_EQ(skel->bss->test_value2.b, tid + 3, "tld_get_data value2.b");
+ ASSERT_EQ(skel->bss->test_value2.c, tid + 4, "tld_get_data value2.c");
+ ASSERT_EQ(skel->bss->test_value2.d, tid + 5, "tld_get_data value2.d");
+ pthread_mutex_unlock(&global_mutex);
+
+ /* Make sure valueX are indeed local to threads */
+ ASSERT_EQ(*value0, tid + 0, "value0");
+ ASSERT_EQ(*value1, tid + 1, "value1");
+ ASSERT_EQ(value2->a, tid + 2, "value2.a");
+ ASSERT_EQ(value2->b, tid + 3, "value2.b");
+ ASSERT_EQ(value2->c, tid + 4, "value2.c");
+ ASSERT_EQ(value2->d, tid + 5, "value2.d");
+
+ *value0 = tid + 5;
+ *value1 = tid + 4;
+ value2->a = tid + 3;
+ value2->b = tid + 2;
+ value2->c = tid + 1;
+ value2->d = tid + 0;
+
+ /* Run task_main again */
+ pthread_mutex_lock(&global_mutex);
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts);
+ ASSERT_OK(err, "run task_main");
+ ASSERT_OK(opts.retval, "task_main retval");
+
+ ASSERT_EQ(skel->bss->test_value0, tid + 5, "tld_get_data value0");
+ ASSERT_EQ(skel->bss->test_value1, tid + 4, "tld_get_data value1");
+ ASSERT_EQ(skel->bss->test_value2.a, tid + 3, "tld_get_data value2.a");
+ ASSERT_EQ(skel->bss->test_value2.b, tid + 2, "tld_get_data value2.b");
+ ASSERT_EQ(skel->bss->test_value2.c, tid + 1, "tld_get_data value2.c");
+ ASSERT_EQ(skel->bss->test_value2.d, tid + 0, "tld_get_data value2.d");
+ pthread_mutex_unlock(&global_mutex);
+
+out:
+ pthread_exit(NULL);
+}
+
+static void test_task_local_data_basic(void)
+{
+ struct test_task_local_data *skel;
+ pthread_t thread[TEST_BASIC_THREAD_NUM];
+ char dummy_key_name[TLD_NAME_LEN];
+ tld_key_t key;
+ int i, err;
+
+ reset_tld();
+
+ ASSERT_OK(pthread_mutex_init(&global_mutex, NULL), "pthread_mutex_init");
+
+ skel = test_task_local_data__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t));
+ if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys"))
+ goto out;
+
+ ASSERT_FALSE(tld_key_is_err(value0_key), "TLD_DEFINE_KEY");
+ tld_keys[1] = tld_create_key("value1", sizeof(int));
+ ASSERT_FALSE(tld_key_is_err(tld_keys[1]), "tld_create_key");
+ tld_keys[2] = tld_create_key("value2", sizeof(struct test_tld_struct));
+ ASSERT_FALSE(tld_key_is_err(tld_keys[2]), "tld_create_key");
+
+ /*
+ * Shouldn't be able to store data exceed a page. Create a TLD just big
+ * enough to exceed a page. TLDs already created are int value0, int
+ * value1, and struct test_tld_struct value2.
+ */
+ key = tld_create_key("value_not_exist",
+ TLD_PAGE_SIZE - 2 * sizeof(int) - sizeof(struct test_tld_struct) + 1);
+ ASSERT_EQ(tld_key_err_or_zero(key), -E2BIG, "tld_create_key");
+
+ key = tld_create_key("value2", sizeof(struct test_tld_struct));
+ ASSERT_EQ(tld_key_err_or_zero(key), -EEXIST, "tld_create_key");
+
+ /* Shouldn't be able to create the (TLD_MAX_DATA_CNT+1)-th TLD */
+ for (i = 3; i < TLD_MAX_DATA_CNT; i++) {
+ snprintf(dummy_key_name, TLD_NAME_LEN, "dummy_value%d", i);
+ tld_keys[i] = tld_create_key(dummy_key_name, sizeof(int));
+ ASSERT_FALSE(tld_key_is_err(tld_keys[i]), "tld_create_key");
+ }
+ key = tld_create_key("value_not_exist", sizeof(struct test_tld_struct));
+ ASSERT_EQ(tld_key_err_or_zero(key), -ENOSPC, "tld_create_key");
+
+ /* Access TLDs from multiple threads and check if they are thread-specific */
+ for (i = 0; i < TEST_BASIC_THREAD_NUM; i++) {
+ err = pthread_create(&thread[i], NULL, test_task_local_data_basic_thread, skel);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto out;
+ }
+
+out:
+ for (i = 0; i < TEST_BASIC_THREAD_NUM; i++)
+ pthread_join(thread[i], NULL);
+
+ if (tld_keys) {
+ free(tld_keys);
+ tld_keys = NULL;
+ }
+ tld_free();
+ test_task_local_data__destroy(skel);
+}
+
+#define TEST_RACE_THREAD_NUM (TLD_MAX_DATA_CNT - 3)
+
+void *test_task_local_data_race_thread(void *arg)
+{
+ int err = 0, id = (intptr_t)arg;
+ char key_name[32];
+ tld_key_t key;
+
+ key = tld_create_key("value_not_exist", TLD_PAGE_SIZE + 1);
+ if (tld_key_err_or_zero(key) != -E2BIG) {
+ err = 1;
+ goto out;
+ }
+
+ /* Only one thread will succeed in creating value1 */
+ key = tld_create_key("value1", sizeof(int));
+ if (!tld_key_is_err(key))
+ tld_keys[1] = key;
+
+ /* Only one thread will succeed in creating value2 */
+ key = tld_create_key("value2", sizeof(struct test_tld_struct));
+ if (!tld_key_is_err(key))
+ tld_keys[2] = key;
+
+ snprintf(key_name, 32, "thread_%d", id);
+ tld_keys[id] = tld_create_key(key_name, sizeof(int));
+ if (tld_key_is_err(tld_keys[id]))
+ err = 2;
+out:
+ return (void *)(intptr_t)err;
+}
+
+static void test_task_local_data_race(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ pthread_t thread[TEST_RACE_THREAD_NUM];
+ struct test_task_local_data *skel;
+ int fd, i, j, err, *data;
+ void *ret = NULL;
+
+ skel = test_task_local_data__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t));
+ if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys"))
+ goto out;
+
+ fd = bpf_map__fd(skel->maps.tld_data_map);
+
+ ASSERT_FALSE(tld_key_is_err(value0_key), "TLD_DEFINE_KEY");
+ tld_keys[0] = value0_key;
+
+ for (j = 0; j < 100; j++) {
+ reset_tld();
+
+ for (i = 0; i < TEST_RACE_THREAD_NUM; i++) {
+ /*
+ * Try to make tld_create_key() race with each other. Call
+ * tld_create_key(), both valid and invalid, from different threads.
+ */
+ err = pthread_create(&thread[i], NULL, test_task_local_data_race_thread,
+ (void *)(intptr_t)(i + 3));
+ if (CHECK_FAIL(err))
+ break;
+ }
+
+ /* Wait for all tld_create_key() to return */
+ for (i = 0; i < TEST_RACE_THREAD_NUM; i++) {
+ pthread_join(thread[i], &ret);
+ if (CHECK_FAIL(ret))
+ break;
+ }
+
+ /* Write a unique number to each TLD */
+ for (i = 0; i < TLD_MAX_DATA_CNT; i++) {
+ data = tld_get_data(fd, tld_keys[i]);
+ if (CHECK_FAIL(!data))
+ break;
+ *data = i;
+ }
+
+ /* Read TLDs and check the value to see if any address collides with another */
+ for (i = 0; i < TLD_MAX_DATA_CNT; i++) {
+ data = tld_get_data(fd, tld_keys[i]);
+ if (CHECK_FAIL(*data != i))
+ break;
+ }
+
+ /* Run task_main to make sure no invalid TLDs are added */
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts);
+ ASSERT_OK(err, "run task_main");
+ ASSERT_OK(opts.retval, "task_main retval");
+ }
+out:
+ if (tld_keys) {
+ free(tld_keys);
+ tld_keys = NULL;
+ }
+ tld_free();
+ test_task_local_data__destroy(skel);
+}
+
+void test_task_local_data(void)
+{
+ if (test__start_subtest("task_local_data_basic"))
+ test_task_local_data_basic();
+ if (test__start_subtest("task_local_data_race"))
+ test_task_local_data_race();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_work.c b/tools/testing/selftests/bpf/prog_tests/test_task_work.c
new file mode 100644
index 000000000000..774b31a5f6ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_task_work.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <string.h>
+#include <stdio.h>
+#include "task_work.skel.h"
+#include "task_work_fail.skel.h"
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+#include <time.h>
+
+static int perf_event_open(__u32 type, __u64 config, int pid)
+{
+ struct perf_event_attr attr = {
+ .type = type,
+ .config = config,
+ .size = sizeof(struct perf_event_attr),
+ .sample_period = 100000,
+ };
+
+ return syscall(__NR_perf_event_open, &attr, pid, -1, -1, 0);
+}
+
+struct elem {
+ char data[128];
+ struct bpf_task_work tw;
+};
+
+static int verify_map(struct bpf_map *map, const char *expected_data)
+{
+ int err;
+ struct elem value;
+ int processed_values = 0;
+ int k, sz;
+
+ sz = bpf_map__max_entries(map);
+ for (k = 0; k < sz; ++k) {
+ err = bpf_map__lookup_elem(map, &k, sizeof(int), &value, sizeof(struct elem), 0);
+ if (err)
+ continue;
+ if (!ASSERT_EQ(strcmp(expected_data, value.data), 0, "map data")) {
+ fprintf(stderr, "expected '%s', found '%s' in %s map", expected_data,
+ value.data, bpf_map__name(map));
+ return 2;
+ }
+ processed_values++;
+ }
+
+ return processed_values == 0;
+}
+
+static void task_work_run(const char *prog_name, const char *map_name)
+{
+ struct task_work *skel;
+ struct bpf_program *prog;
+ struct bpf_map *map;
+ struct bpf_link *link = NULL;
+ int err, pe_fd = -1, pid, status, pipefd[2];
+ char user_string[] = "hello world";
+
+ if (!ASSERT_NEQ(pipe(pipefd), -1, "pipe"))
+ return;
+
+ pid = fork();
+ if (pid == 0) {
+ __u64 num = 1;
+ int i;
+ char buf;
+
+ close(pipefd[1]);
+ read(pipefd[0], &buf, sizeof(buf));
+ close(pipefd[0]);
+
+ for (i = 0; i < 10000; ++i)
+ num *= time(0) % 7;
+ (void)num;
+ exit(0);
+ }
+ if (!ASSERT_GT(pid, 0, "fork() failed")) {
+ close(pipefd[0]);
+ close(pipefd[1]);
+ return;
+ }
+
+ skel = task_work__open();
+ if (!ASSERT_OK_PTR(skel, "task_work__open"))
+ return;
+
+ bpf_object__for_each_program(prog, skel->obj) {
+ bpf_program__set_autoload(prog, false);
+ }
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "prog_name"))
+ goto cleanup;
+ bpf_program__set_autoload(prog, true);
+ skel->bss->user_ptr = (char *)user_string;
+
+ err = task_work__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pe_fd = perf_event_open(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, pid);
+ if (pe_fd == -1 && (errno == ENOENT || errno == EOPNOTSUPP)) {
+ printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__);
+ test__skip();
+ goto cleanup;
+ }
+ if (!ASSERT_NEQ(pe_fd, -1, "pe_fd")) {
+ fprintf(stderr, "perf_event_open errno: %d, pid: %d\n", errno, pid);
+ goto cleanup;
+ }
+
+ link = bpf_program__attach_perf_event(prog, pe_fd);
+ if (!ASSERT_OK_PTR(link, "attach_perf_event"))
+ goto cleanup;
+
+ /* perf event fd ownership is passed to bpf_link */
+ pe_fd = -1;
+ close(pipefd[0]);
+ write(pipefd[1], user_string, 1);
+ close(pipefd[1]);
+ /* Wait to collect some samples */
+ waitpid(pid, &status, 0);
+ pid = 0;
+ map = bpf_object__find_map_by_name(skel->obj, map_name);
+ if (!ASSERT_OK_PTR(map, "find map_name"))
+ goto cleanup;
+ if (!ASSERT_OK(verify_map(map, user_string), "verify map"))
+ goto cleanup;
+cleanup:
+ if (pe_fd >= 0)
+ close(pe_fd);
+ bpf_link__destroy(link);
+ task_work__destroy(skel);
+ if (pid > 0) {
+ close(pipefd[0]);
+ write(pipefd[1], user_string, 1);
+ close(pipefd[1]);
+ waitpid(pid, &status, 0);
+ }
+}
+
+void test_task_work(void)
+{
+ if (test__start_subtest("test_task_work_hash_map"))
+ task_work_run("oncpu_hash_map", "hmap");
+
+ if (test__start_subtest("test_task_work_array_map"))
+ task_work_run("oncpu_array_map", "arrmap");
+
+ if (test__start_subtest("test_task_work_lru_map"))
+ task_work_run("oncpu_lru_map", "lrumap");
+
+ RUN_TESTS(task_work_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
index 367f47e4a936..b38c16b4247f 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_veristat.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
@@ -75,26 +75,26 @@ static void test_set_global_vars_succeeds(void)
" -vl2 > %s", fix->veristat, fix->tmpfile);
read(fix->fd, fix->output, fix->sz);
- __CHECK_STR("_w=0xf000000000000001 ", "var_s64 = 0xf000000000000001");
- __CHECK_STR("_w=0xfedcba9876543210 ", "var_u64 = 0xfedcba9876543210");
- __CHECK_STR("_w=0x80000000 ", "var_s32 = -0x80000000");
- __CHECK_STR("_w=0x76543210 ", "var_u32 = 0x76543210");
- __CHECK_STR("_w=0x8000 ", "var_s16 = -32768");
- __CHECK_STR("_w=0xecec ", "var_u16 = 60652");
- __CHECK_STR("_w=128 ", "var_s8 = -128");
- __CHECK_STR("_w=255 ", "var_u8 = 255");
- __CHECK_STR("_w=11 ", "var_ea = EA2");
- __CHECK_STR("_w=12 ", "var_eb = EB2");
- __CHECK_STR("_w=13 ", "var_ec = EC2");
- __CHECK_STR("_w=1 ", "var_b = 1");
- __CHECK_STR("_w=170 ", "struct1[2].struct2[1][2].u.var_u8[2]=170");
- __CHECK_STR("_w=0xaaaa ", "union1.var_u16 = 0xaaaa");
- __CHECK_STR("_w=171 ", "arr[3]= 171");
- __CHECK_STR("_w=172 ", "arr[EA2] =172");
- __CHECK_STR("_w=10 ", "enum_arr[EC2]=EA3");
- __CHECK_STR("_w=173 ", "matrix[31][7][11]=173");
- __CHECK_STR("_w=174 ", "struct1[2].struct2[1][2].u.mat[5][3]=174");
- __CHECK_STR("_w=175 ", "struct11[7][5].struct2[0][1].u.mat[3][0]=175");
+ __CHECK_STR("=0xf000000000000001 ", "var_s64 = 0xf000000000000001");
+ __CHECK_STR("=0xfedcba9876543210 ", "var_u64 = 0xfedcba9876543210");
+ __CHECK_STR("=0x80000000 ", "var_s32 = -0x80000000");
+ __CHECK_STR("=0x76543210 ", "var_u32 = 0x76543210");
+ __CHECK_STR("=0x8000 ", "var_s16 = -32768");
+ __CHECK_STR("=0xecec ", "var_u16 = 60652");
+ __CHECK_STR("=128 ", "var_s8 = -128");
+ __CHECK_STR("=255 ", "var_u8 = 255");
+ __CHECK_STR("=11 ", "var_ea = EA2");
+ __CHECK_STR("=12 ", "var_eb = EB2");
+ __CHECK_STR("=13 ", "var_ec = EC2");
+ __CHECK_STR("=1 ", "var_b = 1");
+ __CHECK_STR("=170 ", "struct1[2].struct2[1][2].u.var_u8[2]=170");
+ __CHECK_STR("=0xaaaa ", "union1.var_u16 = 0xaaaa");
+ __CHECK_STR("=171 ", "arr[3]= 171");
+ __CHECK_STR("=172 ", "arr[EA2] =172");
+ __CHECK_STR("=10 ", "enum_arr[EC2]=EA3");
+ __CHECK_STR("=173 ", "matrix[31][7][11]=173");
+ __CHECK_STR("=174 ", "struct1[2].struct2[1][2].u.mat[5][3]=174");
+ __CHECK_STR("=175 ", "struct11[7][5].struct2[0][1].u.mat[3][0]=175");
out:
teardown_fixture(fix);
@@ -117,8 +117,8 @@ static void test_set_global_vars_from_file_succeeds(void)
SYS(out, "%s set_global_vars.bpf.o -G \"@%s\" -vl2 > %s",
fix->veristat, input_file, fix->tmpfile);
read(fix->fd, fix->output, fix->sz);
- __CHECK_STR("_w=0x8000 ", "var_s16 = -32768");
- __CHECK_STR("_w=0xecec ", "var_u16 = 60652");
+ __CHECK_STR("=0x8000 ", "var_s16 = -32768");
+ __CHECK_STR("=0xecec ", "var_u16 = 60652");
out:
close(fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
index d66687f1ee6a..34f9ccce2602 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -3,6 +3,7 @@
#include <test_progs.h>
#include "timer.skel.h"
#include "timer_failure.skel.h"
+#include "timer_interrupt.skel.h"
#define NUM_THR 8
@@ -86,6 +87,10 @@ void serial_test_timer(void)
int err;
timer_skel = timer__open_and_load();
+ if (!timer_skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
return;
@@ -95,3 +100,36 @@ void serial_test_timer(void)
RUN_TESTS(timer_failure);
}
+
+void test_timer_interrupt(void)
+{
+ struct timer_interrupt *skel = NULL;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ skel = timer_interrupt__open_and_load();
+ if (!skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
+ if (!ASSERT_OK_PTR(skel, "timer_interrupt__open_and_load"))
+ return;
+
+ err = timer_interrupt__attach(skel);
+ if (!ASSERT_OK(err, "timer_interrupt__attach"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.test_timer_interrupt);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ goto out;
+
+ usleep(50);
+
+ ASSERT_EQ(skel->bss->in_interrupt, 0, "in_interrupt");
+ if (skel->bss->preempt_count)
+ ASSERT_NEQ(skel->bss->in_interrupt_cb, 0, "in_interrupt_cb");
+
+out:
+ timer_interrupt__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_crash.c b/tools/testing/selftests/bpf/prog_tests/timer_crash.c
index f74b82305da8..b841597c8a3a 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_crash.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_crash.c
@@ -12,6 +12,10 @@ static void test_timer_crash_mode(int mode)
struct timer_crash *skel;
skel = timer_crash__open_and_load();
+ if (!skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
if (!ASSERT_OK_PTR(skel, "timer_crash__open_and_load"))
return;
skel->bss->pid = getpid();
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
index 1a2f99596916..eb303fa1e09a 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
@@ -59,6 +59,10 @@ void test_timer_lockup(void)
}
skel = timer_lockup__open_and_load();
+ if (!skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load"))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
index 9ff7843909e7..c930c7d7105b 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
@@ -65,6 +65,10 @@ void serial_test_timer_mim(void)
goto cleanup;
timer_skel = timer_mim__open_and_load();
+ if (!timer_skel && errno == EOPNOTSUPP) {
+ test__skip();
+ return;
+ }
if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
index 19e68d4b3532..6f8c0bfb0415 100644
--- a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
@@ -112,10 +112,39 @@ destroy_skel:
tracing_struct_many_args__destroy(skel);
}
+static void test_union_args(void)
+{
+ struct tracing_struct *skel;
+ int err;
+
+ skel = tracing_struct__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "tracing_struct__open_and_load"))
+ return;
+
+ err = tracing_struct__attach(skel);
+ if (!ASSERT_OK(err, "tracing_struct__attach"))
+ goto out;
+
+ ASSERT_OK(trigger_module_test_read(256), "trigger_read");
+
+ ASSERT_EQ(skel->bss->ut1_a_a, 1, "ut1:a.arg.a");
+ ASSERT_EQ(skel->bss->ut1_b, 4, "ut1:b");
+ ASSERT_EQ(skel->bss->ut1_c, 5, "ut1:c");
+
+ ASSERT_EQ(skel->bss->ut2_a, 6, "ut2:a");
+ ASSERT_EQ(skel->bss->ut2_b_a, 2, "ut2:b.arg.a");
+ ASSERT_EQ(skel->bss->ut2_b_b, 3, "ut2:b.arg.b");
+
+out:
+ tracing_struct__destroy(skel);
+}
+
void test_tracing_struct(void)
{
if (test__start_subtest("struct_args"))
test_struct_args();
if (test__start_subtest("struct_many_args"))
test_struct_many_args();
+ if (test__start_subtest("union_args"))
+ test_union_args();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe.c b/tools/testing/selftests/bpf/prog_tests/uprobe.c
index cf3e0e7a64fa..86404476c1da 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2023 Hengqi Chen */
#include <test_progs.h>
+#include <asm/ptrace.h>
#include "test_uprobe.skel.h"
static FILE *urand_spawn(int *pid)
@@ -33,7 +34,7 @@ static int urand_trigger(FILE **urand_pipe)
return exit_code;
}
-void test_uprobe(void)
+static void test_uprobe_attach(void)
{
LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
struct test_uprobe *skel;
@@ -93,3 +94,156 @@ cleanup:
pclose(urand_pipe);
test_uprobe__destroy(skel);
}
+
+#ifdef __x86_64__
+__naked __maybe_unused unsigned long uprobe_regs_change_trigger(void)
+{
+ asm volatile (
+ "ret\n"
+ );
+}
+
+static __naked void uprobe_regs_change(struct pt_regs *before, struct pt_regs *after)
+{
+ asm volatile (
+ "movq %r11, 48(%rdi)\n"
+ "movq %r10, 56(%rdi)\n"
+ "movq %r9, 64(%rdi)\n"
+ "movq %r8, 72(%rdi)\n"
+ "movq %rax, 80(%rdi)\n"
+ "movq %rcx, 88(%rdi)\n"
+ "movq %rdx, 96(%rdi)\n"
+ "movq %rsi, 104(%rdi)\n"
+ "movq %rdi, 112(%rdi)\n"
+
+ /* save 2nd argument */
+ "pushq %rsi\n"
+ "call uprobe_regs_change_trigger\n"
+
+ /* save return value and load 2nd argument pointer to rax */
+ "pushq %rax\n"
+ "movq 8(%rsp), %rax\n"
+
+ "movq %r11, 48(%rax)\n"
+ "movq %r10, 56(%rax)\n"
+ "movq %r9, 64(%rax)\n"
+ "movq %r8, 72(%rax)\n"
+ "movq %rcx, 88(%rax)\n"
+ "movq %rdx, 96(%rax)\n"
+ "movq %rsi, 104(%rax)\n"
+ "movq %rdi, 112(%rax)\n"
+
+ /* restore return value and 2nd argument */
+ "pop %rax\n"
+ "pop %rsi\n"
+
+ "movq %rax, 80(%rsi)\n"
+ "ret\n"
+ );
+}
+
+static void regs_common(void)
+{
+ struct pt_regs before = {}, after = {}, expected = {
+ .rax = 0xc0ffe,
+ .rcx = 0xbad,
+ .rdx = 0xdead,
+ .r8 = 0x8,
+ .r9 = 0x9,
+ .r10 = 0x10,
+ .r11 = 0x11,
+ .rdi = 0x12,
+ .rsi = 0x13,
+ };
+ LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct test_uprobe *skel;
+
+ skel = test_uprobe__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->my_pid = getpid();
+ skel->bss->regs = expected;
+
+ uprobe_opts.func_name = "uprobe_regs_change_trigger";
+ skel->links.test_regs_change = bpf_program__attach_uprobe_opts(skel->progs.test_regs_change,
+ -1,
+ "/proc/self/exe",
+ 0 /* offset */,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.test_regs_change, "bpf_program__attach_uprobe_opts"))
+ goto cleanup;
+
+ uprobe_regs_change(&before, &after);
+
+ ASSERT_EQ(after.rax, expected.rax, "ax");
+ ASSERT_EQ(after.rcx, expected.rcx, "cx");
+ ASSERT_EQ(after.rdx, expected.rdx, "dx");
+ ASSERT_EQ(after.r8, expected.r8, "r8");
+ ASSERT_EQ(after.r9, expected.r9, "r9");
+ ASSERT_EQ(after.r10, expected.r10, "r10");
+ ASSERT_EQ(after.r11, expected.r11, "r11");
+ ASSERT_EQ(after.rdi, expected.rdi, "rdi");
+ ASSERT_EQ(after.rsi, expected.rsi, "rsi");
+
+cleanup:
+ test_uprobe__destroy(skel);
+}
+
+static noinline unsigned long uprobe_regs_change_ip_1(void)
+{
+ return 0xc0ffee;
+}
+
+static noinline unsigned long uprobe_regs_change_ip_2(void)
+{
+ return 0xdeadbeef;
+}
+
+static void regs_ip(void)
+{
+ LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct test_uprobe *skel;
+ unsigned long ret;
+
+ skel = test_uprobe__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->my_pid = getpid();
+ skel->bss->ip = (unsigned long) uprobe_regs_change_ip_2;
+
+ uprobe_opts.func_name = "uprobe_regs_change_ip_1";
+ skel->links.test_regs_change_ip = bpf_program__attach_uprobe_opts(
+ skel->progs.test_regs_change_ip,
+ -1,
+ "/proc/self/exe",
+ 0 /* offset */,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.test_regs_change_ip, "bpf_program__attach_uprobe_opts"))
+ goto cleanup;
+
+ ret = uprobe_regs_change_ip_1();
+ ASSERT_EQ(ret, 0xdeadbeef, "ret");
+
+cleanup:
+ test_uprobe__destroy(skel);
+}
+
+static void test_uprobe_regs_change(void)
+{
+ if (test__start_subtest("regs_change_common"))
+ regs_common();
+ if (test__start_subtest("regs_change_ip"))
+ regs_ip();
+}
+#else
+static void test_uprobe_regs_change(void) { }
+#endif
+
+void test_uprobe(void)
+{
+ if (test__start_subtest("attach"))
+ test_uprobe_attach();
+ test_uprobe_regs_change();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
index b17dc39a23db..6d75ede16e7c 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
@@ -8,22 +8,31 @@
#include <asm/ptrace.h>
#include <linux/compiler.h>
#include <linux/stringify.h>
+#include <linux/kernel.h>
#include <sys/wait.h>
#include <sys/syscall.h>
#include <sys/prctl.h>
#include <asm/prctl.h>
#include "uprobe_syscall.skel.h"
#include "uprobe_syscall_executed.skel.h"
+#include "bpf/libbpf_internal.h"
-__naked unsigned long uretprobe_regs_trigger(void)
+#define USDT_NOP .byte 0x0f, 0x1f, 0x44, 0x00, 0x00
+#include "usdt.h"
+
+#pragma GCC diagnostic ignored "-Wattributes"
+
+__attribute__((aligned(16)))
+__nocf_check __weak __naked unsigned long uprobe_regs_trigger(void)
{
asm volatile (
+ ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n" /* nop5 */
"movq $0xdeadbeef, %rax\n"
"ret\n"
);
}
-__naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after)
+__naked void uprobe_regs(struct pt_regs *before, struct pt_regs *after)
{
asm volatile (
"movq %r15, 0(%rdi)\n"
@@ -44,15 +53,17 @@ __naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after)
"movq $0, 120(%rdi)\n" /* orig_rax */
"movq $0, 128(%rdi)\n" /* rip */
"movq $0, 136(%rdi)\n" /* cs */
+ "pushq %rax\n"
"pushf\n"
"pop %rax\n"
"movq %rax, 144(%rdi)\n" /* eflags */
+ "pop %rax\n"
"movq %rsp, 152(%rdi)\n" /* rsp */
"movq $0, 160(%rdi)\n" /* ss */
/* save 2nd argument */
"pushq %rsi\n"
- "call uretprobe_regs_trigger\n"
+ "call uprobe_regs_trigger\n"
/* save return value and load 2nd argument pointer to rax */
"pushq %rax\n"
@@ -92,25 +103,37 @@ __naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after)
);
}
-static void test_uretprobe_regs_equal(void)
+static void test_uprobe_regs_equal(bool retprobe)
{
+ LIBBPF_OPTS(bpf_uprobe_opts, opts,
+ .retprobe = retprobe,
+ );
struct uprobe_syscall *skel = NULL;
struct pt_regs before = {}, after = {};
unsigned long *pb = (unsigned long *) &before;
unsigned long *pa = (unsigned long *) &after;
unsigned long *pp;
+ unsigned long offset;
unsigned int i, cnt;
- int err;
+
+ offset = get_uprobe_offset(&uprobe_regs_trigger);
+ if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+ return;
skel = uprobe_syscall__open_and_load();
if (!ASSERT_OK_PTR(skel, "uprobe_syscall__open_and_load"))
goto cleanup;
- err = uprobe_syscall__attach(skel);
- if (!ASSERT_OK(err, "uprobe_syscall__attach"))
+ skel->links.probe = bpf_program__attach_uprobe_opts(skel->progs.probe,
+ 0, "/proc/self/exe", offset, &opts);
+ if (!ASSERT_OK_PTR(skel->links.probe, "bpf_program__attach_uprobe_opts"))
goto cleanup;
- uretprobe_regs(&before, &after);
+ /* make sure uprobe gets optimized */
+ if (!retprobe)
+ uprobe_regs_trigger();
+
+ uprobe_regs(&before, &after);
pp = (unsigned long *) &skel->bss->regs;
cnt = sizeof(before)/sizeof(*pb);
@@ -119,7 +142,7 @@ static void test_uretprobe_regs_equal(void)
unsigned int offset = i * sizeof(unsigned long);
/*
- * Check register before and after uretprobe_regs_trigger call
+ * Check register before and after uprobe_regs_trigger call
* that triggers the uretprobe.
*/
switch (offset) {
@@ -133,7 +156,7 @@ static void test_uretprobe_regs_equal(void)
/*
* Check register seen from bpf program and register after
- * uretprobe_regs_trigger call
+ * uprobe_regs_trigger call (with rax exception, check below).
*/
switch (offset) {
/*
@@ -146,6 +169,15 @@ static void test_uretprobe_regs_equal(void)
case offsetof(struct pt_regs, rsp):
case offsetof(struct pt_regs, ss):
break;
+ /*
+ * uprobe does not see return value in rax, it needs to see the
+ * original (before) rax value
+ */
+ case offsetof(struct pt_regs, rax):
+ if (!retprobe) {
+ ASSERT_EQ(pp[i], pb[i], "uprobe rax prog-before value check");
+ break;
+ }
default:
if (!ASSERT_EQ(pp[i], pa[i], "register prog-after value check"))
fprintf(stdout, "failed register offset %u\n", offset);
@@ -175,7 +207,7 @@ static int write_bpf_testmod_uprobe(unsigned long offset)
return ret != n ? (int) ret : 0;
}
-static void test_uretprobe_regs_change(void)
+static void test_regs_change(void)
{
struct pt_regs before = {}, after = {};
unsigned long *pb = (unsigned long *) &before;
@@ -183,13 +215,16 @@ static void test_uretprobe_regs_change(void)
unsigned long cnt = sizeof(before)/sizeof(*pb);
unsigned int i, err, offset;
- offset = get_uprobe_offset(uretprobe_regs_trigger);
+ offset = get_uprobe_offset(uprobe_regs_trigger);
err = write_bpf_testmod_uprobe(offset);
if (!ASSERT_OK(err, "register_uprobe"))
return;
- uretprobe_regs(&before, &after);
+ /* make sure uprobe gets optimized */
+ uprobe_regs_trigger();
+
+ uprobe_regs(&before, &after);
err = write_bpf_testmod_uprobe(0);
if (!ASSERT_OK(err, "unregister_uprobe"))
@@ -252,6 +287,7 @@ static void test_uretprobe_syscall_call(void)
);
struct uprobe_syscall_executed *skel;
int pid, status, err, go[2], c = 0;
+ struct bpf_link *link;
if (!ASSERT_OK(pipe(go), "pipe"))
return;
@@ -277,11 +313,14 @@ static void test_uretprobe_syscall_call(void)
_exit(0);
}
- skel->links.test = bpf_program__attach_uprobe_multi(skel->progs.test, pid,
- "/proc/self/exe",
- "uretprobe_syscall_call", &opts);
- if (!ASSERT_OK_PTR(skel->links.test, "bpf_program__attach_uprobe_multi"))
+ skel->bss->pid = pid;
+
+ link = bpf_program__attach_uprobe_multi(skel->progs.test_uretprobe_multi,
+ pid, "/proc/self/exe",
+ "uretprobe_syscall_call", &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
goto cleanup;
+ skel->links.test_uretprobe_multi = link;
/* kick the child */
write(go[1], &c, 1);
@@ -301,6 +340,256 @@ cleanup:
close(go[0]);
}
+#define TRAMP "[uprobes-trampoline]"
+
+__attribute__((aligned(16)))
+__nocf_check __weak __naked void uprobe_test(void)
+{
+ asm volatile (" \n"
+ ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00 \n"
+ "ret \n"
+ );
+}
+
+__attribute__((aligned(16)))
+__nocf_check __weak void usdt_test(void)
+{
+ USDT(optimized_uprobe, usdt);
+}
+
+static int find_uprobes_trampoline(void *tramp_addr)
+{
+ void *start, *end;
+ char line[128];
+ int ret = -1;
+ FILE *maps;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps) {
+ fprintf(stderr, "cannot open maps\n");
+ return -1;
+ }
+
+ while (fgets(line, sizeof(line), maps)) {
+ int m = -1;
+
+ /* We care only about private r-x mappings. */
+ if (sscanf(line, "%p-%p r-xp %*x %*x:%*x %*u %n", &start, &end, &m) != 2)
+ continue;
+ if (m < 0)
+ continue;
+ if (!strncmp(&line[m], TRAMP, sizeof(TRAMP)-1) && (start == tramp_addr)) {
+ ret = 0;
+ break;
+ }
+ }
+
+ fclose(maps);
+ return ret;
+}
+
+static unsigned char nop5[5] = { 0x0f, 0x1f, 0x44, 0x00, 0x00 };
+
+static void *find_nop5(void *fn)
+{
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ if (!memcmp(nop5, fn + i, 5))
+ return fn + i;
+ }
+ return NULL;
+}
+
+typedef void (__attribute__((nocf_check)) *trigger_t)(void);
+
+static void *check_attach(struct uprobe_syscall_executed *skel, trigger_t trigger,
+ void *addr, int executed)
+{
+ struct __arch_relative_insn {
+ __u8 op;
+ __s32 raddr;
+ } __packed *call;
+ void *tramp = NULL;
+
+ /* Uprobe gets optimized after first trigger, so let's press twice. */
+ trigger();
+ trigger();
+
+ /* Make sure bpf program got executed.. */
+ ASSERT_EQ(skel->bss->executed, executed, "executed");
+
+ /* .. and check the trampoline is as expected. */
+ call = (struct __arch_relative_insn *) addr;
+ tramp = (void *) (call + 1) + call->raddr;
+ ASSERT_EQ(call->op, 0xe8, "call");
+ ASSERT_OK(find_uprobes_trampoline(tramp), "uprobes_trampoline");
+
+ return tramp;
+}
+
+static void check_detach(void *addr, void *tramp)
+{
+ /* [uprobes_trampoline] stays after detach */
+ ASSERT_OK(find_uprobes_trampoline(tramp), "uprobes_trampoline");
+ ASSERT_OK(memcmp(addr, nop5, 5), "nop5");
+}
+
+static void check(struct uprobe_syscall_executed *skel, struct bpf_link *link,
+ trigger_t trigger, void *addr, int executed)
+{
+ void *tramp;
+
+ tramp = check_attach(skel, trigger, addr, executed);
+ bpf_link__destroy(link);
+ check_detach(addr, tramp);
+}
+
+static void test_uprobe_legacy(void)
+{
+ struct uprobe_syscall_executed *skel = NULL;
+ LIBBPF_OPTS(bpf_uprobe_opts, opts,
+ .retprobe = true,
+ );
+ struct bpf_link *link;
+ unsigned long offset;
+
+ offset = get_uprobe_offset(&uprobe_test);
+ if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+ goto cleanup;
+
+ /* uprobe */
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ link = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe,
+ 0, "/proc/self/exe", offset, NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_opts"))
+ goto cleanup;
+
+ check(skel, link, uprobe_test, uprobe_test, 2);
+
+ /* uretprobe */
+ skel->bss->executed = 0;
+
+ link = bpf_program__attach_uprobe_opts(skel->progs.test_uretprobe,
+ 0, "/proc/self/exe", offset, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_opts"))
+ goto cleanup;
+
+ check(skel, link, uprobe_test, uprobe_test, 2);
+
+cleanup:
+ uprobe_syscall_executed__destroy(skel);
+}
+
+static void test_uprobe_multi(void)
+{
+ struct uprobe_syscall_executed *skel = NULL;
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
+ struct bpf_link *link;
+ unsigned long offset;
+
+ offset = get_uprobe_offset(&uprobe_test);
+ if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+ goto cleanup;
+
+ opts.offsets = &offset;
+ opts.cnt = 1;
+
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ /* uprobe.multi */
+ link = bpf_program__attach_uprobe_multi(skel->progs.test_uprobe_multi,
+ 0, "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ check(skel, link, uprobe_test, uprobe_test, 2);
+
+ /* uretprobe.multi */
+ skel->bss->executed = 0;
+ opts.retprobe = true;
+ link = bpf_program__attach_uprobe_multi(skel->progs.test_uretprobe_multi,
+ 0, "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ check(skel, link, uprobe_test, uprobe_test, 2);
+
+cleanup:
+ uprobe_syscall_executed__destroy(skel);
+}
+
+static void test_uprobe_session(void)
+{
+ struct uprobe_syscall_executed *skel = NULL;
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
+ .session = true,
+ );
+ struct bpf_link *link;
+ unsigned long offset;
+
+ offset = get_uprobe_offset(&uprobe_test);
+ if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+ goto cleanup;
+
+ opts.offsets = &offset;
+ opts.cnt = 1;
+
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ link = bpf_program__attach_uprobe_multi(skel->progs.test_uprobe_session,
+ 0, "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ check(skel, link, uprobe_test, uprobe_test, 4);
+
+cleanup:
+ uprobe_syscall_executed__destroy(skel);
+}
+
+static void test_uprobe_usdt(void)
+{
+ struct uprobe_syscall_executed *skel;
+ struct bpf_link *link;
+ void *addr;
+
+ errno = 0;
+ addr = find_nop5(usdt_test);
+ if (!ASSERT_OK_PTR(addr, "find_nop5"))
+ return;
+
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ link = bpf_program__attach_usdt(skel->progs.test_usdt,
+ -1 /* all PIDs */, "/proc/self/exe",
+ "optimized_uprobe", "usdt", NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_usdt"))
+ goto cleanup;
+
+ check(skel, link, usdt_test, addr, 2);
+
+cleanup:
+ uprobe_syscall_executed__destroy(skel);
+}
+
/*
* Borrowed from tools/testing/selftests/x86/test_shadow_stack.c.
*
@@ -343,43 +632,172 @@ static void test_uretprobe_shadow_stack(void)
return;
}
- /* Run all of the uretprobe tests. */
- test_uretprobe_regs_equal();
- test_uretprobe_regs_change();
+ /* Run all the tests with shadow stack in place. */
+
+ test_uprobe_regs_equal(false);
+ test_uprobe_regs_equal(true);
test_uretprobe_syscall_call();
+ test_uprobe_legacy();
+ test_uprobe_multi();
+ test_uprobe_session();
+ test_uprobe_usdt();
+
+ test_regs_change();
+
ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK);
}
-#else
-static void test_uretprobe_regs_equal(void)
+
+static volatile bool race_stop;
+
+static USDT_DEFINE_SEMA(race);
+
+static void *worker_trigger(void *arg)
{
- test__skip();
+ unsigned long rounds = 0;
+
+ while (!race_stop) {
+ uprobe_test();
+ rounds++;
+ }
+
+ printf("tid %d trigger rounds: %lu\n", gettid(), rounds);
+ return NULL;
}
-static void test_uretprobe_regs_change(void)
+static void *worker_attach(void *arg)
{
- test__skip();
+ LIBBPF_OPTS(bpf_uprobe_opts, opts);
+ struct uprobe_syscall_executed *skel;
+ unsigned long rounds = 0, offset;
+ const char *sema[2] = {
+ __stringify(USDT_SEMA(race)),
+ NULL,
+ };
+ unsigned long *ref;
+ int err;
+
+ offset = get_uprobe_offset(&uprobe_test);
+ if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+ return NULL;
+
+ err = elf_resolve_syms_offsets("/proc/self/exe", 1, (const char **) &sema, &ref, STT_OBJECT);
+ if (!ASSERT_OK(err, "elf_resolve_syms_offsets_sema"))
+ return NULL;
+
+ opts.ref_ctr_offset = *ref;
+
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+ return NULL;
+
+ skel->bss->pid = getpid();
+
+ while (!race_stop) {
+ skel->links.test_uprobe = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe,
+ 0, "/proc/self/exe", offset, &opts);
+ if (!ASSERT_OK_PTR(skel->links.test_uprobe, "bpf_program__attach_uprobe_opts"))
+ break;
+
+ bpf_link__destroy(skel->links.test_uprobe);
+ skel->links.test_uprobe = NULL;
+ rounds++;
+ }
+
+ printf("tid %d attach rounds: %lu hits: %d\n", gettid(), rounds, skel->bss->executed);
+ uprobe_syscall_executed__destroy(skel);
+ free(ref);
+ return NULL;
}
-static void test_uretprobe_syscall_call(void)
+static useconds_t race_msec(void)
{
- test__skip();
+ char *env;
+
+ env = getenv("BPF_SELFTESTS_UPROBE_SYSCALL_RACE_MSEC");
+ if (env)
+ return atoi(env);
+
+ /* default duration is 500ms */
+ return 500;
}
-static void test_uretprobe_shadow_stack(void)
+static void test_uprobe_race(void)
{
- test__skip();
+ int err, i, nr_threads;
+ pthread_t *threads;
+
+ nr_threads = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(nr_threads, 0, "libbpf_num_possible_cpus"))
+ return;
+ nr_threads = max(2, nr_threads);
+
+ threads = alloca(sizeof(*threads) * nr_threads);
+ if (!ASSERT_OK_PTR(threads, "malloc"))
+ return;
+
+ for (i = 0; i < nr_threads; i++) {
+ err = pthread_create(&threads[i], NULL, i % 2 ? worker_trigger : worker_attach,
+ NULL);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto cleanup;
+ }
+
+ usleep(race_msec() * 1000);
+
+cleanup:
+ race_stop = true;
+ for (nr_threads = i, i = 0; i < nr_threads; i++)
+ pthread_join(threads[i], NULL);
+
+ ASSERT_FALSE(USDT_SEMA_IS_ACTIVE(race), "race_semaphore");
}
+
+#ifndef __NR_uprobe
+#define __NR_uprobe 336
#endif
-void test_uprobe_syscall(void)
+static void test_uprobe_error(void)
+{
+ long err = syscall(__NR_uprobe);
+
+ ASSERT_EQ(err, -1, "error");
+ ASSERT_EQ(errno, ENXIO, "errno");
+}
+
+static void __test_uprobe_syscall(void)
{
if (test__start_subtest("uretprobe_regs_equal"))
- test_uretprobe_regs_equal();
- if (test__start_subtest("uretprobe_regs_change"))
- test_uretprobe_regs_change();
+ test_uprobe_regs_equal(true);
if (test__start_subtest("uretprobe_syscall_call"))
test_uretprobe_syscall_call();
if (test__start_subtest("uretprobe_shadow_stack"))
test_uretprobe_shadow_stack();
+ if (test__start_subtest("uprobe_legacy"))
+ test_uprobe_legacy();
+ if (test__start_subtest("uprobe_multi"))
+ test_uprobe_multi();
+ if (test__start_subtest("uprobe_session"))
+ test_uprobe_session();
+ if (test__start_subtest("uprobe_usdt"))
+ test_uprobe_usdt();
+ if (test__start_subtest("uprobe_race"))
+ test_uprobe_race();
+ if (test__start_subtest("uprobe_error"))
+ test_uprobe_error();
+ if (test__start_subtest("uprobe_regs_equal"))
+ test_uprobe_regs_equal(false);
+ if (test__start_subtest("regs_change"))
+ test_regs_change();
+}
+#else
+static void __test_uprobe_syscall(void)
+{
+ test__skip();
+}
+#endif
+
+void test_uprobe_syscall(void)
+{
+ __test_uprobe_syscall();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
index 9057e983cc54..4f7f45e69315 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -40,12 +40,79 @@ static void __always_inline trigger_func(int x) {
}
}
-static void subtest_basic_usdt(void)
+#if defined(__x86_64__) || defined(__i386__)
+/*
+ * SIB (Scale-Index-Base) addressing format: "size@(base_reg, index_reg, scale)"
+ * - 'size' is the size in bytes of the array element, and its sign indicates
+ * whether the type is signed (negative) or unsigned (positive).
+ * - 'base_reg' is the register holding the base address, normally rdx or edx
+ * - 'index_reg' is the register holding the index, normally rax or eax
+ * - 'scale' is the scaling factor (typically 1, 2, 4, or 8), which matches the
+ * size of the element type.
+ *
+ * For example, for an array of 'short' (signed 2-byte elements), the SIB spec would be:
+ * - size: -2 (negative because 'short' is signed)
+ * - scale: 2 (since sizeof(short) == 2)
+ *
+ * The resulting SIB format: "-2@(%%rdx,%%rax,2)" for x86_64, "-2@(%%edx,%%eax,2)" for i386
+ */
+static volatile short array[] = {-1, -2, -3, -4};
+
+#if defined(__x86_64__)
+#define USDT_SIB_ARG_SPEC -2@(%%rdx,%%rax,2)
+#else
+#define USDT_SIB_ARG_SPEC -2@(%%edx,%%eax,2)
+#endif
+
+unsigned short test_usdt_sib_semaphore SEC(".probes");
+
+static void trigger_sib_spec(void)
+{
+ /*
+ * Force SIB addressing with inline assembly.
+ *
+ * You must compile with -std=gnu99 or -std=c99 to use the
+ * STAP_PROBE_ASM macro.
+ *
+ * The STAP_PROBE_ASM macro generates a quoted string that gets
+ * inserted between the surrounding assembly instructions. In this
+ * case, USDT_SIB_ARG_SPEC is embedded directly into the instruction
+ * stream, creating a probe point between the asm statement boundaries.
+ * It works fine with gcc/clang.
+ *
+ * Register constraints:
+ * - "d"(array): Binds the 'array' variable to %rdx or %edx register
+ * - "a"(0): Binds the constant 0 to %rax or %eax register
+ * These ensure that when USDT_SIB_ARG_SPEC references %%rdx(%edx) and
+ * %%rax(%eax), they contain the expected values for SIB addressing.
+ *
+ * The "memory" clobber prevents the compiler from reordering memory
+ * accesses around the probe point, ensuring that the probe behavior
+ * is predictable and consistent.
+ */
+ asm volatile(
+ STAP_PROBE_ASM(test, usdt_sib, USDT_SIB_ARG_SPEC)
+ :
+ : "d"(array), "a"(0)
+ : "memory"
+ );
+}
+#endif
+
+static void subtest_basic_usdt(bool optimized)
{
LIBBPF_OPTS(bpf_usdt_opts, opts);
struct test_usdt *skel;
struct test_usdt__bss *bss;
- int err, i;
+ int err, i, called;
+ const __u64 expected_cookie = 0xcafedeadbeeffeed;
+
+#define TRIGGER(x) ({ \
+ trigger_func(x); \
+ if (optimized) \
+ trigger_func(x); \
+ optimized ? 2 : 1; \
+ })
skel = test_usdt__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel_open"))
@@ -59,20 +126,29 @@ static void subtest_basic_usdt(void)
goto cleanup;
/* usdt0 won't be auto-attached */
- opts.usdt_cookie = 0xcafedeadbeeffeed;
+ opts.usdt_cookie = expected_cookie;
skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0,
0 /*self*/, "/proc/self/exe",
"test", "usdt0", &opts);
if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link"))
goto cleanup;
- trigger_func(1);
+#if defined(__x86_64__) || defined(__i386__)
+ opts.usdt_cookie = expected_cookie;
+ skel->links.usdt_sib = bpf_program__attach_usdt(skel->progs.usdt_sib,
+ 0 /*self*/, "/proc/self/exe",
+ "test", "usdt_sib", &opts);
+ if (!ASSERT_OK_PTR(skel->links.usdt_sib, "usdt_sib_link"))
+ goto cleanup;
+#endif
+
+ alled = TRIGGER(1);
- ASSERT_EQ(bss->usdt0_called, 1, "usdt0_called");
- ASSERT_EQ(bss->usdt3_called, 1, "usdt3_called");
- ASSERT_EQ(bss->usdt12_called, 1, "usdt12_called");
+ ASSERT_EQ(bss->usdt0_called, called, "usdt0_called");
+ ASSERT_EQ(bss->usdt3_called, called, "usdt3_called");
+ ASSERT_EQ(bss->usdt12_called, called, "usdt12_called");
- ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie");
+ ASSERT_EQ(bss->usdt0_cookie, expected_cookie, "usdt0_cookie");
ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt");
ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret");
ASSERT_EQ(bss->usdt0_arg_size, -ENOENT, "usdt0_arg_size");
@@ -119,11 +195,11 @@ static void subtest_basic_usdt(void)
* bpf_program__attach_usdt() handles this properly and attaches to
* all possible places of USDT invocation.
*/
- trigger_func(2);
+ called += TRIGGER(2);
- ASSERT_EQ(bss->usdt0_called, 2, "usdt0_called");
- ASSERT_EQ(bss->usdt3_called, 2, "usdt3_called");
- ASSERT_EQ(bss->usdt12_called, 2, "usdt12_called");
+ ASSERT_EQ(bss->usdt0_called, called, "usdt0_called");
+ ASSERT_EQ(bss->usdt3_called, called, "usdt3_called");
+ ASSERT_EQ(bss->usdt12_called, called, "usdt12_called");
/* only check values that depend on trigger_func()'s input value */
ASSERT_EQ(bss->usdt3_args[0], 2, "usdt3_arg1");
@@ -142,9 +218,9 @@ static void subtest_basic_usdt(void)
if (!ASSERT_OK_PTR(skel->links.usdt3, "usdt3_reattach"))
goto cleanup;
- trigger_func(3);
+ called += TRIGGER(3);
- ASSERT_EQ(bss->usdt3_called, 3, "usdt3_called");
+ ASSERT_EQ(bss->usdt3_called, called, "usdt3_called");
/* this time usdt3 has custom cookie */
ASSERT_EQ(bss->usdt3_cookie, 0xBADC00C51E, "usdt3_cookie");
ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt");
@@ -156,8 +232,19 @@ static void subtest_basic_usdt(void)
ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+#if defined(__x86_64__) || defined(__i386__)
+ trigger_sib_spec();
+ ASSERT_EQ(bss->usdt_sib_called, 1, "usdt_sib_called");
+ ASSERT_EQ(bss->usdt_sib_cookie, expected_cookie, "usdt_sib_cookie");
+ ASSERT_EQ(bss->usdt_sib_arg_cnt, 1, "usdt_sib_arg_cnt");
+ ASSERT_EQ(bss->usdt_sib_arg, nums[0], "usdt_sib_arg");
+ ASSERT_EQ(bss->usdt_sib_arg_ret, 0, "usdt_sib_arg_ret");
+ ASSERT_EQ(bss->usdt_sib_arg_size, sizeof(nums[0]), "usdt_sib_arg_size");
+#endif
+
cleanup:
test_usdt__destroy(skel);
+#undef TRIGGER
}
unsigned short test_usdt_100_semaphore SEC(".probes");
@@ -425,7 +512,11 @@ cleanup:
void test_usdt(void)
{
if (test__start_subtest("basic"))
- subtest_basic_usdt();
+ subtest_basic_usdt(false);
+#ifdef __x86_64__
+ if (test__start_subtest("basic_optimized"))
+ subtest_basic_usdt(true);
+#endif
if (test__start_subtest("multispec"))
subtest_multispec_usdt();
if (test__start_subtest("urand_auto_attach"))
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 77ec95d4ffaa..28e81161e6fc 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -46,6 +46,7 @@
#include "verifier_ldsx.skel.h"
#include "verifier_leak_ptr.skel.h"
#include "verifier_linked_scalars.skel.h"
+#include "verifier_live_stack.skel.h"
#include "verifier_load_acquire.skel.h"
#include "verifier_loops1.skel.h"
#include "verifier_lwt.skel.h"
@@ -59,6 +60,7 @@
#include "verifier_meta_access.skel.h"
#include "verifier_movsx.skel.h"
#include "verifier_mtu.skel.h"
+#include "verifier_mul.skel.h"
#include "verifier_netfilter_ctx.skel.h"
#include "verifier_netfilter_retcode.skel.h"
#include "verifier_bpf_fastcall.skel.h"
@@ -183,6 +185,7 @@ void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); }
void test_verifier_ldsx(void) { RUN(verifier_ldsx); }
void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); }
void test_verifier_linked_scalars(void) { RUN(verifier_linked_scalars); }
+void test_verifier_live_stack(void) { RUN(verifier_live_stack); }
void test_verifier_loops1(void) { RUN(verifier_loops1); }
void test_verifier_lwt(void) { RUN(verifier_lwt); }
void test_verifier_map_in_map(void) { RUN(verifier_map_in_map); }
@@ -194,6 +197,7 @@ void test_verifier_may_goto_1(void) { RUN(verifier_may_goto_1); }
void test_verifier_may_goto_2(void) { RUN(verifier_may_goto_2); }
void test_verifier_meta_access(void) { RUN(verifier_meta_access); }
void test_verifier_movsx(void) { RUN(verifier_movsx); }
+void test_verifier_mul(void) { RUN(verifier_mul); }
void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); }
void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); }
void test_verifier_bpf_fastcall(void) { RUN(verifier_bpf_fastcall); }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
index b9d9f0a502ce..178292d1251a 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -9,6 +9,7 @@
#define TX_NETNS "xdp_context_tx"
#define RX_NETNS "xdp_context_rx"
#define TAP_NAME "tap0"
+#define DUMMY_NAME "dum0"
#define TAP_NETNS "xdp_context_tuntap"
#define TEST_PAYLOAD_LEN 32
@@ -96,9 +97,7 @@ void test_xdp_context_test_run(void)
/* Meta data must be 255 bytes or smaller */
test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0);
- /* Total size of data must match data_end - data_meta */
- test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
- sizeof(data) - 1, 0, 0, 0);
+ /* Total size of data must be data_end - data_meta or larger */
test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
sizeof(data) + 1, 0, 0, 0);
@@ -156,15 +155,30 @@ err:
return -1;
}
-static void assert_test_result(struct test_xdp_meta *skel)
+static int write_test_packet(int tap_fd)
+{
+ __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
+ int n;
+
+ /* The ethernet header doesn't need to be valid for this test */
+ memset(packet, 0, sizeof(struct ethhdr));
+ memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN);
+
+ n = write(tap_fd, packet, sizeof(packet));
+ if (!ASSERT_EQ(n, sizeof(packet), "write packet"))
+ return -1;
+
+ return 0;
+}
+
+static void assert_test_result(const struct bpf_map *result_map)
{
int err;
__u32 map_key = 0;
__u8 map_value[TEST_PAYLOAD_LEN];
- err = bpf_map__lookup_elem(skel->maps.test_result, &map_key,
- sizeof(map_key), &map_value,
- TEST_PAYLOAD_LEN, BPF_ANY);
+ err = bpf_map__lookup_elem(result_map, &map_key, sizeof(map_key),
+ &map_value, TEST_PAYLOAD_LEN, BPF_ANY);
if (!ASSERT_OK(err, "lookup test_result"))
return;
@@ -172,6 +186,18 @@ static void assert_test_result(struct test_xdp_meta *skel)
"test_result map contains test payload");
}
+static bool clear_test_result(struct bpf_map *result_map)
+{
+ const __u8 v[sizeof(test_payload)] = {};
+ const __u32 k = 0;
+ int err;
+
+ err = bpf_map__update_elem(result_map, &k, sizeof(k), v, sizeof(v), BPF_ANY);
+ ASSERT_OK(err, "update test_result");
+
+ return err == 0;
+}
+
void test_xdp_context_veth(void)
{
LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
@@ -248,7 +274,7 @@ void test_xdp_context_veth(void)
if (!ASSERT_OK(ret, "send_test_packet"))
goto close;
- assert_test_result(skel);
+ assert_test_result(skel->maps.test_result);
close:
close_netns(nstoken);
@@ -257,17 +283,21 @@ close:
netns_free(tx_ns);
}
-void test_xdp_context_tuntap(void)
+static void test_tuntap(struct bpf_program *xdp_prog,
+ struct bpf_program *tc_prio_1_prog,
+ struct bpf_program *tc_prio_2_prog,
+ struct bpf_map *result_map)
{
LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
struct netns_obj *ns = NULL;
- struct test_xdp_meta *skel = NULL;
- __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
int tap_fd = -1;
int tap_ifindex;
int ret;
+ if (!clear_test_result(result_map))
+ return;
+
ns = netns_new(TAP_NETNS, true);
if (!ASSERT_OK_PTR(ns, "create and open ns"))
return;
@@ -278,10 +308,6 @@ void test_xdp_context_tuntap(void)
SYS(close, "ip link set dev " TAP_NAME " up");
- skel = test_xdp_meta__open_and_load();
- if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
- goto close;
-
tap_ifindex = if_nametoindex(TAP_NAME);
if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex"))
goto close;
@@ -291,33 +317,175 @@ void test_xdp_context_tuntap(void)
if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
goto close;
- tc_opts.prog_fd = bpf_program__fd(skel->progs.ing_cls);
+ tc_opts.prog_fd = bpf_program__fd(tc_prio_1_prog);
ret = bpf_tc_attach(&tc_hook, &tc_opts);
if (!ASSERT_OK(ret, "bpf_tc_attach"))
goto close;
- ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(skel->progs.ing_xdp),
+ if (tc_prio_2_prog) {
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 2,
+ .prog_fd = bpf_program__fd(tc_prio_2_prog));
+
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto close;
+ }
+
+ ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog),
0, NULL);
if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
goto close;
- /* The ethernet header is not relevant for this test and doesn't need to
- * be meaningful.
- */
- struct ethhdr eth = { 0 };
+ ret = write_test_packet(tap_fd);
+ if (!ASSERT_OK(ret, "write_test_packet"))
+ goto close;
- memcpy(packet, &eth, sizeof(eth));
- memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN);
+ assert_test_result(result_map);
+
+close:
+ if (tap_fd >= 0)
+ close(tap_fd);
+ netns_free(ns);
+}
+
+/* Write a packet to a tap dev and copy it to ingress of a dummy dev */
+static void test_tuntap_mirred(struct bpf_program *xdp_prog,
+ struct bpf_program *tc_prog,
+ bool *test_pass)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ struct netns_obj *ns = NULL;
+ int dummy_ifindex;
+ int tap_fd = -1;
+ int tap_ifindex;
+ int ret;
+
+ *test_pass = false;
+
+ ns = netns_new(TAP_NETNS, true);
+ if (!ASSERT_OK_PTR(ns, "netns_new"))
+ return;
- ret = write(tap_fd, packet, sizeof(packet));
- if (!ASSERT_EQ(ret, sizeof(packet), "write packet"))
+ /* Setup dummy interface */
+ SYS(close, "ip link add name " DUMMY_NAME " type dummy");
+ SYS(close, "ip link set dev " DUMMY_NAME " up");
+
+ dummy_ifindex = if_nametoindex(DUMMY_NAME);
+ if (!ASSERT_GE(dummy_ifindex, 0, "if_nametoindex"))
goto close;
- assert_test_result(skel);
+ tc_hook.ifindex = dummy_ifindex;
+ ret = bpf_tc_hook_create(&tc_hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ goto close;
+
+ tc_opts.prog_fd = bpf_program__fd(tc_prog);
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto close;
+
+ /* Setup TAP interface */
+ tap_fd = open_tuntap(TAP_NAME, true);
+ if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
+ goto close;
+
+ SYS(close, "ip link set dev " TAP_NAME " up");
+
+ tap_ifindex = if_nametoindex(TAP_NAME);
+ if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex"))
+ goto close;
+
+ ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL);
+ if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+ goto close;
+
+ /* Copy all packets received from TAP to dummy ingress */
+ SYS(close, "tc qdisc add dev " TAP_NAME " clsact");
+ SYS(close, "tc filter add dev " TAP_NAME " ingress "
+ "protocol all matchall "
+ "action mirred ingress mirror dev " DUMMY_NAME);
+
+ /* Receive a packet on TAP */
+ ret = write_test_packet(tap_fd);
+ if (!ASSERT_OK(ret, "write_test_packet"))
+ goto close;
+
+ ASSERT_TRUE(*test_pass, "test_pass");
close:
if (tap_fd >= 0)
close(tap_fd);
- test_xdp_meta__destroy(skel);
netns_free(ns);
}
+
+void test_xdp_context_tuntap(void)
+{
+ struct test_xdp_meta *skel = NULL;
+
+ skel = test_xdp_meta__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+ return;
+
+ if (test__start_subtest("data_meta"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls,
+ NULL, /* tc prio 2 */
+ skel->maps.test_result);
+ if (test__start_subtest("dynptr_read"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls_dynptr_read,
+ NULL, /* tc prio 2 */
+ skel->maps.test_result);
+ if (test__start_subtest("dynptr_slice"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls_dynptr_slice,
+ NULL, /* tc prio 2 */
+ skel->maps.test_result);
+ if (test__start_subtest("dynptr_write"))
+ test_tuntap(skel->progs.ing_xdp_zalloc_meta,
+ skel->progs.ing_cls_dynptr_write,
+ skel->progs.ing_cls_dynptr_read,
+ skel->maps.test_result);
+ if (test__start_subtest("dynptr_slice_rdwr"))
+ test_tuntap(skel->progs.ing_xdp_zalloc_meta,
+ skel->progs.ing_cls_dynptr_slice_rdwr,
+ skel->progs.ing_cls_dynptr_slice,
+ skel->maps.test_result);
+ if (test__start_subtest("dynptr_offset"))
+ test_tuntap(skel->progs.ing_xdp_zalloc_meta,
+ skel->progs.ing_cls_dynptr_offset_wr,
+ skel->progs.ing_cls_dynptr_offset_rd,
+ skel->maps.test_result);
+ if (test__start_subtest("dynptr_offset_oob"))
+ test_tuntap(skel->progs.ing_xdp,
+ skel->progs.ing_cls_dynptr_offset_oob,
+ skel->progs.ing_cls,
+ skel->maps.test_result);
+ if (test__start_subtest("clone_data_meta_empty_on_data_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_data_meta_empty_on_data_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_data_meta_empty_on_meta_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_data_meta_empty_on_meta_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_dynptr_empty_on_data_slice_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_dynptr_empty_on_data_slice_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_dynptr_empty_on_meta_slice_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_dynptr_empty_on_meta_slice_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_dynptr_rdonly_before_data_dynptr_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_dynptr_rdonly_before_data_dynptr_write,
+ &skel->bss->test_pass);
+ if (test__start_subtest("clone_dynptr_rdonly_before_meta_dynptr_write"))
+ test_tuntap_mirred(skel->progs.ing_xdp,
+ skel->progs.clone_dynptr_rdonly_before_meta_dynptr_write,
+ &skel->bss->test_pass);
+
+ test_xdp_meta__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index 461ab18705d5..a8ab05216c38 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -7,6 +7,7 @@
#include <test_progs.h>
#include "test_xdp_devmap_helpers.skel.h"
+#include "test_xdp_devmap_tailcall.skel.h"
#include "test_xdp_with_devmap_frags_helpers.skel.h"
#include "test_xdp_with_devmap_helpers.skel.h"
@@ -107,6 +108,29 @@ static void test_neg_xdp_devmap_helpers(void)
}
}
+static void test_xdp_devmap_tailcall(enum bpf_attach_type prog_dev,
+ enum bpf_attach_type prog_tail,
+ bool expect_reject)
+{
+ struct test_xdp_devmap_tailcall *skel;
+ int err;
+
+ skel = test_xdp_devmap_tailcall__open();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_devmap_tailcall__open"))
+ return;
+
+ bpf_program__set_expected_attach_type(skel->progs.xdp_devmap, prog_dev);
+ bpf_program__set_expected_attach_type(skel->progs.xdp_entry, prog_tail);
+
+ err = test_xdp_devmap_tailcall__load(skel);
+ if (expect_reject)
+ ASSERT_ERR(err, "test_xdp_devmap_tailcall__load");
+ else
+ ASSERT_OK(err, "test_xdp_devmap_tailcall__load");
+
+ test_xdp_devmap_tailcall__destroy(skel);
+}
+
static void test_xdp_with_devmap_frags_helpers(void)
{
struct test_xdp_with_devmap_frags_helpers *skel;
@@ -238,8 +262,13 @@ void serial_test_xdp_devmap_attach(void)
if (test__start_subtest("DEVMAP with frags programs in entries"))
test_xdp_with_devmap_frags_helpers();
- if (test__start_subtest("Verifier check of DEVMAP programs"))
+ if (test__start_subtest("Verifier check of DEVMAP programs")) {
test_neg_xdp_devmap_helpers();
+ test_xdp_devmap_tailcall(BPF_XDP_DEVMAP, BPF_XDP_DEVMAP, false);
+ test_xdp_devmap_tailcall(0, 0, true);
+ test_xdp_devmap_tailcall(BPF_XDP_DEVMAP, 0, true);
+ test_xdp_devmap_tailcall(0, BPF_XDP_DEVMAP, true);
+ }
if (test__start_subtest("DEVMAP with programs in entries on veth"))
test_xdp_with_devmap_helpers_veth();
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c
new file mode 100644
index 000000000000..efa350d04ec5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_xdp_pull_data.skel.h"
+
+#define PULL_MAX (1 << 31)
+#define PULL_PLUS_ONE (1 << 30)
+
+#define XDP_PACKET_HEADROOM 256
+
+/* Find headroom and tailroom occupied by struct xdp_frame and struct
+ * skb_shared_info so that we can calculate the maximum pull lengths for
+ * test cases. They might not be the real size of the structures due to
+ * cache alignment.
+ */
+static int find_xdp_sizes(struct test_xdp_pull_data *skel, int frame_sz)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct xdp_md ctx = {};
+ int prog_fd, err;
+ __u8 *buf;
+
+ buf = calloc(frame_sz, sizeof(__u8));
+ if (!ASSERT_OK_PTR(buf, "calloc buf"))
+ return -ENOMEM;
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = frame_sz;
+ topts.data_size_out = frame_sz;
+ /* Pass a data_end larger than the linear space available to make sure
+ * bpf_prog_test_run_xdp() will fill the linear data area so that
+ * xdp_find_sizes can infer the size of struct skb_shared_info
+ */
+ ctx.data_end = frame_sz;
+ topts.ctx_in = &ctx;
+ topts.ctx_out = &ctx;
+ topts.ctx_size_in = sizeof(ctx);
+ topts.ctx_size_out = sizeof(ctx);
+
+ prog_fd = bpf_program__fd(skel->progs.xdp_find_sizes);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+ free(buf);
+
+ return err;
+}
+
+/* xdp_pull_data_prog will directly read a marker 0xbb stored at buf[1024]
+ * so caller expecting XDP_PASS should always pass pull_len no less than 1024
+ */
+static void run_test(struct test_xdp_pull_data *skel, int retval,
+ int frame_sz, int buff_len, int meta_len, int data_len,
+ int pull_len)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct xdp_md ctx = {};
+ int prog_fd, err;
+ __u8 *buf;
+
+ buf = calloc(buff_len, sizeof(__u8));
+ if (!ASSERT_OK_PTR(buf, "calloc buf"))
+ return;
+
+ buf[meta_len + 1023] = 0xaa;
+ buf[meta_len + 1024] = 0xbb;
+ buf[meta_len + 1025] = 0xcc;
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = buff_len;
+ topts.data_size_out = buff_len;
+ ctx.data = meta_len;
+ ctx.data_end = meta_len + data_len;
+ topts.ctx_in = &ctx;
+ topts.ctx_out = &ctx;
+ topts.ctx_size_in = sizeof(ctx);
+ topts.ctx_size_out = sizeof(ctx);
+
+ skel->bss->data_len = data_len;
+ if (pull_len & PULL_MAX) {
+ int headroom = XDP_PACKET_HEADROOM - meta_len - skel->bss->xdpf_sz;
+ int tailroom = frame_sz - XDP_PACKET_HEADROOM -
+ data_len - skel->bss->sinfo_sz;
+
+ pull_len = pull_len & PULL_PLUS_ONE ? 1 : 0;
+ pull_len += headroom + tailroom + data_len;
+ }
+ skel->bss->pull_len = pull_len;
+
+ prog_fd = bpf_program__fd(skel->progs.xdp_pull_data_prog);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_EQ(topts.retval, retval, "xdp_pull_data_prog retval");
+
+ if (retval == XDP_DROP)
+ goto out;
+
+ ASSERT_EQ(ctx.data_end, meta_len + pull_len, "linear data size");
+ ASSERT_EQ(topts.data_size_out, buff_len, "linear + non-linear data size");
+ /* Make sure data around xdp->data_end was not messed up by
+ * bpf_xdp_pull_data()
+ */
+ ASSERT_EQ(buf[meta_len + 1023], 0xaa, "data[1023]");
+ ASSERT_EQ(buf[meta_len + 1024], 0xbb, "data[1024]");
+ ASSERT_EQ(buf[meta_len + 1025], 0xcc, "data[1025]");
+out:
+ free(buf);
+}
+
+static void test_xdp_pull_data_basic(void)
+{
+ u32 pg_sz, max_meta_len, max_data_len;
+ struct test_xdp_pull_data *skel;
+
+ skel = test_xdp_pull_data__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_pull_data__open_and_load"))
+ return;
+
+ pg_sz = sysconf(_SC_PAGE_SIZE);
+
+ if (find_xdp_sizes(skel, pg_sz))
+ goto out;
+
+ max_meta_len = XDP_PACKET_HEADROOM - skel->bss->xdpf_sz;
+ max_data_len = pg_sz - XDP_PACKET_HEADROOM - skel->bss->sinfo_sz;
+
+ /* linear xdp pkt, pull 0 byte */
+ run_test(skel, XDP_PASS, pg_sz, 2048, 0, 2048, 2048);
+
+ /* multi-buf pkt, pull results in linear xdp pkt */
+ run_test(skel, XDP_PASS, pg_sz, 2048, 0, 1024, 2048);
+
+ /* multi-buf pkt, pull 1 byte to linear data area */
+ run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1024, 1025);
+
+ /* multi-buf pkt, pull 0 byte to linear data area */
+ run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1025, 1025);
+
+ /* multi-buf pkt, empty linear data area, pull requires memmove */
+ run_test(skel, XDP_PASS, pg_sz, 9000, 0, 0, PULL_MAX);
+
+ /* multi-buf pkt, no headroom */
+ run_test(skel, XDP_PASS, pg_sz, 9000, max_meta_len, 1024, PULL_MAX);
+
+ /* multi-buf pkt, no tailroom, pull requires memmove */
+ run_test(skel, XDP_PASS, pg_sz, 9000, 0, max_data_len, PULL_MAX);
+
+ /* Test cases with invalid pull length */
+
+ /* linear xdp pkt, pull more than total data len */
+ run_test(skel, XDP_DROP, pg_sz, 2048, 0, 2048, 2049);
+
+ /* multi-buf pkt with no space left in linear data area */
+ run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, max_data_len,
+ PULL_MAX | PULL_PLUS_ONE);
+
+ /* multi-buf pkt, empty linear data area */
+ run_test(skel, XDP_DROP, pg_sz, 9000, 0, 0, PULL_MAX | PULL_PLUS_ONE);
+
+ /* multi-buf pkt, no headroom */
+ run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, 1024,
+ PULL_MAX | PULL_PLUS_ONE);
+
+ /* multi-buf pkt, no tailroom */
+ run_test(skel, XDP_DROP, pg_sz, 9000, 0, max_data_len,
+ PULL_MAX | PULL_PLUS_ONE);
+
+out:
+ test_xdp_pull_data__destroy(skel);
+}
+
+void test_xdp_pull_data(void)
+{
+ if (test__start_subtest("xdp_pull_data"))
+ test_xdp_pull_data_basic();
+}
diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c
index a52feff98112..d1841aac94a2 100644
--- a/tools/testing/selftests/bpf/progs/arena_atomics.c
+++ b/tools/testing/selftests/bpf/progs/arena_atomics.c
@@ -28,7 +28,8 @@ bool skip_all_tests = true;
#if defined(ENABLE_ATOMICS_TESTS) && \
defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
- (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64))
bool skip_lacq_srel_tests __attribute((__section__(".data"))) = false;
#else
bool skip_lacq_srel_tests = true;
@@ -314,7 +315,8 @@ int load_acquire(const void *ctx)
{
#if defined(ENABLE_ATOMICS_TESTS) && \
defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
- (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64))
#define LOAD_ACQUIRE_ARENA(SIZEOP, SIZE, SRC, DST) \
{ asm volatile ( \
@@ -365,7 +367,8 @@ int store_release(const void *ctx)
{
#if defined(ENABLE_ATOMICS_TESTS) && \
defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
- (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86))
+ (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64))
#define STORE_RELEASE_ARENA(SIZEOP, DST, VAL) \
{ asm volatile ( \
diff --git a/tools/testing/selftests/bpf/progs/arena_spin_lock.c b/tools/testing/selftests/bpf/progs/arena_spin_lock.c
index c4500c37f85e..086b57a426cf 100644
--- a/tools/testing/selftests/bpf/progs/arena_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/arena_spin_lock.c
@@ -37,8 +37,11 @@ int prog(void *ctx)
#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
unsigned long flags;
- if ((ret = arena_spin_lock_irqsave(&lock, flags)))
+ if ((ret = arena_spin_lock_irqsave(&lock, flags))) {
+ if (ret == -EOPNOTSUPP)
+ test_skip = 3;
return ret;
+ }
if (counter != limit)
counter++;
bpf_repeat(cs_count);
diff --git a/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h
index d67466c1ff77..f90531cf3ee5 100644
--- a/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h
+++ b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h
@@ -302,7 +302,7 @@ int arena_spin_lock_slowpath(arena_spinlock_t __arena __arg_arena *lock, u32 val
* barriers.
*/
if (val & _Q_LOCKED_MASK)
- smp_cond_load_acquire_label(&lock->locked, !VAL, release_err);
+ (void)smp_cond_load_acquire_label(&lock->locked, !VAL, release_err);
/*
* take ownership and clear the pending bit.
@@ -380,7 +380,7 @@ queue:
/* Link @node into the waitqueue. */
WRITE_ONCE(prev->next, node);
- arch_mcs_spin_lock_contended_label(&node->locked, release_node_err);
+ (void)arch_mcs_spin_lock_contended_label(&node->locked, release_node_err);
/*
* While waiting for the MCS lock, the next pointer may have
diff --git a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
index 1654a530aa3d..4e51785e7606 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
@@ -101,7 +101,7 @@ static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
tp->snd_cwnd = pkts_in_flight + sndcnt;
}
-/* Decide wheather to run the increase function of congestion control. */
+/* Decide whether to run the increase function of congestion control. */
static bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
{
if (tcp_sk(sk)->reordering > TCP_REORDERING)
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index 7cd73e75f52a..32c511bcd60b 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
-/* WARNING: This implemenation is not necessarily the same
+/* WARNING: This implementation is not necessarily the same
* as the tcp_dctcp.c. The purpose is mainly for testing
* the kernel BPF logic.
*/
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index c1cfd297aabf..a7a1a684eed1 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -33,7 +33,20 @@
* e.g. "foo{{[0-9]+}}" matches strings like "foo007".
* Extended POSIX regular expression syntax is allowed
* inside the brackets.
+ * __not_msg Message not expected to be found in verifier log.
+ * If __msg_not is situated between __msg tags
+ * framework matches __msg tags first, and then
+ * checks that __msg_not is not present in a portion of
+ * a log between bracketing __msg tags.
+ * Same regex syntax as for __msg is supported.
* __msg_unpriv Same as __msg but for unprivileged mode.
+ * __not_msg_unpriv Same as __not_msg but for unprivileged mode.
+ *
+ * __stderr Message expected to be found in bpf stderr stream. The
+ * same regex rules apply like __msg.
+ * __stderr_unpriv Same as __stderr but for unpriveleged mode.
+ * __stdout Same as __stderr but for stdout stream.
+ * __stdout_unpriv Same as __stdout but for unpriveleged mode.
*
* __xlated Expect a line in a disassembly log after verifier applies rewrites.
* Multiple __xlated attributes could be specified.
@@ -115,12 +128,14 @@
* __caps_unpriv Specify the capabilities that should be set when running the test.
*/
#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg)))
+#define __not_msg(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg=" XSTR(__COUNTER__) "=" msg)))
#define __xlated(msg) __attribute__((btf_decl_tag("comment:test_expect_xlated=" XSTR(__COUNTER__) "=" msg)))
#define __jited(msg) __attribute__((btf_decl_tag("comment:test_jited=" XSTR(__COUNTER__) "=" msg)))
#define __failure __attribute__((btf_decl_tag("comment:test_expect_failure")))
#define __success __attribute__((btf_decl_tag("comment:test_expect_success")))
#define __description(desc) __attribute__((btf_decl_tag("comment:test_description=" desc)))
#define __msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_msg_unpriv=" XSTR(__COUNTER__) "=" msg)))
+#define __not_msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg_unpriv=" XSTR(__COUNTER__) "=" msg)))
#define __xlated_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_xlated_unpriv=" XSTR(__COUNTER__) "=" msg)))
#define __jited_unpriv(msg) __attribute__((btf_decl_tag("comment:test_jited=" XSTR(__COUNTER__) "=" msg)))
#define __failure_unpriv __attribute__((btf_decl_tag("comment:test_expect_failure_unpriv")))
@@ -136,9 +151,14 @@
#define __arch_x86_64 __arch("X86_64")
#define __arch_arm64 __arch("ARM64")
#define __arch_riscv64 __arch("RISCV64")
+#define __arch_s390x __arch("s390x")
#define __caps_unpriv(caps) __attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps))))
#define __load_if_JITed() __attribute__((btf_decl_tag("comment:load_mode=jited")))
#define __load_if_no_JITed() __attribute__((btf_decl_tag("comment:load_mode=no_jited")))
+#define __stderr(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr=" XSTR(__COUNTER__) "=" msg)))
+#define __stderr_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr_unpriv=" XSTR(__COUNTER__) "=" msg)))
+#define __stdout(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout=" XSTR(__COUNTER__) "=" msg)))
+#define __stdout_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout_unpriv=" XSTR(__COUNTER__) "=" msg)))
/* Define common capabilities tested using __caps_unpriv */
#define CAP_NET_ADMIN 12
@@ -156,6 +176,10 @@
#define __imm_ptr(name) [name]"r"(&name)
#define __imm_insn(name, expr) [name]"i"(*(long *)&(expr))
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+#define offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
+
/* Magic constants used with __retval() */
#define POINTER_VALUE 0xbadcafe
#define TEST_DATA_LEN 64
diff --git a/tools/testing/selftests/bpf/progs/bpf_test_utils.h b/tools/testing/selftests/bpf/progs/bpf_test_utils.h
new file mode 100644
index 000000000000..f4e67b492dd2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_test_utils.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_TEST_UTILS_H__
+#define __BPF_TEST_UTILS_H__
+
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* Clobber as many native registers and stack slots as possible. */
+static __always_inline void clobber_regs_stack(void)
+{
+ char tmp_str[] = "123456789";
+ unsigned long tmp;
+
+ bpf_strtoul(tmp_str, sizeof(tmp_str), 0, &tmp);
+ __sink(tmp);
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c
index 092db1d0435e..88e13e17ec9e 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c
@@ -73,7 +73,7 @@ int BPF_PROG(use_css_iter_non_sleepable)
}
SEC("lsm.s/socket_connect")
-__failure __msg("expected an RCU CS")
+__failure __msg("kernel func bpf_iter_css_new requires RCU critical section protection")
int BPF_PROG(use_css_iter_sleepable_missing_rcu_lock)
{
u64 cgrp_id = bpf_get_current_cgroup_id();
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
index 5354455a01be..02d8f160ca0e 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
@@ -221,3 +221,15 @@ int BPF_PROG(test_cgrp_from_id, struct cgroup *cgrp, const char *path)
return 0;
}
+
+SEC("syscall")
+int test_cgrp_from_id_ns(void *ctx)
+{
+ struct cgroup *cg;
+
+ cg = bpf_cgroup_from_id(1);
+ if (!cg)
+ return 42;
+ bpf_cgroup_release(cg);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/crypto_sanity.c b/tools/testing/selftests/bpf/progs/crypto_sanity.c
index 645be6cddf36..dfd8a258f14a 100644
--- a/tools/testing/selftests/bpf/progs/crypto_sanity.c
+++ b/tools/testing/selftests/bpf/progs/crypto_sanity.c
@@ -14,7 +14,7 @@ unsigned char key[256] = {};
u16 udp_test_port = 7777;
u32 authsize, key_len;
char algo[128] = {};
-char dst[16] = {};
+char dst[16] = {}, dst_bad[8] = {};
int status;
static int skb_dynptr_validate(struct __sk_buff *skb, struct bpf_dynptr *psrc)
@@ -59,10 +59,9 @@ int skb_crypto_setup(void *ctx)
.authsize = authsize,
};
struct bpf_crypto_ctx *cctx;
- int err = 0;
+ int err;
status = 0;
-
if (key_len > 256) {
status = -EINVAL;
return 0;
@@ -70,8 +69,8 @@ int skb_crypto_setup(void *ctx)
__builtin_memcpy(&params.algo, algo, sizeof(algo));
__builtin_memcpy(&params.key, key, sizeof(key));
- cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
+ cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
if (!cctx) {
status = err;
return 0;
@@ -80,7 +79,6 @@ int skb_crypto_setup(void *ctx)
err = crypto_ctx_insert(cctx);
if (err && err != -EEXIST)
status = err;
-
return 0;
}
@@ -92,6 +90,7 @@ int decrypt_sanity(struct __sk_buff *skb)
struct bpf_dynptr psrc, pdst;
int err;
+ status = 0;
err = skb_dynptr_validate(skb, &psrc);
if (err < 0) {
status = err;
@@ -110,13 +109,23 @@ int decrypt_sanity(struct __sk_buff *skb)
return TC_ACT_SHOT;
}
- /* dst is a global variable to make testing part easier to check. In real
- * production code, a percpu map should be used to store the result.
+ /* Check also bad case where the dst buffer is smaller than the
+ * skb's linear section.
+ */
+ bpf_dynptr_from_mem(dst_bad, sizeof(dst_bad), 0, &pdst);
+ status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL);
+ if (!status)
+ status = -EIO;
+ if (status != -EINVAL)
+ goto err;
+
+ /* dst is a global variable to make testing part easier to check.
+ * In real production code, a percpu map should be used to store
+ * the result.
*/
bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst);
-
status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL);
-
+err:
return TC_ACT_SHOT;
}
@@ -129,7 +138,6 @@ int encrypt_sanity(struct __sk_buff *skb)
int err;
status = 0;
-
err = skb_dynptr_validate(skb, &psrc);
if (err < 0) {
status = err;
@@ -148,13 +156,23 @@ int encrypt_sanity(struct __sk_buff *skb)
return TC_ACT_SHOT;
}
- /* dst is a global variable to make testing part easier to check. In real
- * production code, a percpu map should be used to store the result.
+ /* Check also bad case where the dst buffer is smaller than the
+ * skb's linear section.
+ */
+ bpf_dynptr_from_mem(dst_bad, sizeof(dst_bad), 0, &pdst);
+ status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL);
+ if (!status)
+ status = -EIO;
+ if (status != -EINVAL)
+ goto err;
+
+ /* dst is a global variable to make testing part easier to check.
+ * In real production code, a percpu map should be used to store
+ * the result.
*/
bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst);
-
status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL);
-
+err:
return TC_ACT_SHOT;
}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index bd8f15229f5c..dda6a8dada82 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -269,6 +269,26 @@ int data_slice_out_of_bounds_skb(struct __sk_buff *skb)
return SK_PASS;
}
+/* A metadata slice can't be accessed out of bounds */
+SEC("?tc")
+__failure __msg("value is outside of the allowed memory range")
+int data_slice_out_of_bounds_skb_meta(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ /* this should fail */
+ *(md + 1) = 42;
+
+ return SK_PASS;
+}
+
SEC("?raw_tp")
__failure __msg("value is outside of the allowed memory range")
int data_slice_out_of_bounds_map_value(void *ctx)
@@ -1089,6 +1109,26 @@ int skb_invalid_slice_write(struct __sk_buff *skb)
return SK_PASS;
}
+/* bpf_dynptr_slice()s are read-only and cannot be written to */
+SEC("?tc")
+__failure __msg("R{{[0-9]+}} cannot write into rdonly_mem")
+int skb_meta_invalid_slice_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
/* The read-only data slice is invalidated whenever a helper changes packet data */
SEC("?tc")
__failure __msg("invalid mem access 'scalar'")
@@ -1192,6 +1232,188 @@ int skb_invalid_data_slice4(struct __sk_buff *skb)
return SK_PASS;
}
+/* Read-only skb data slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *d;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ d = bpf_dynptr_slice(&data, 0, NULL, sizeof(*d));
+ if (!d)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ val = *d;
+
+ return SK_PASS;
+}
+
+/* Read-write skb data slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *d;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ d = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*d));
+ if (!d)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ *d = 42;
+
+ return SK_PASS;
+}
+
+/* Read-only skb metadata slice is invalidated on write to skb data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&data, 0, "x", 1, 0);
+
+ /* this should fail */
+ val = *md;
+
+ return SK_PASS;
+}
+
+/* Read-write skb metadata slice is invalidated on write to skb data slice */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&data, 0, "x", 1, 0);
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
+/* Read-only skb metadata slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ val = *md;
+
+ return SK_PASS;
+}
+
+/* Read-write skb metadata slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
+/* Read-only skb metadata slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ val = *md;
+
+ return SK_PASS;
+}
+
+/* Read-write skb metadata slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+ /* this should fail */
+ *md = 42;
+
+ return SK_PASS;
+}
+
/* The read-only data slice is invalidated whenever a helper changes packet data */
SEC("?xdp")
__failure __msg("invalid mem access 'scalar'")
@@ -1255,6 +1477,19 @@ int skb_invalid_ctx(void *ctx)
return 0;
}
+/* Only supported prog type can create skb_meta-type dynptrs */
+SEC("?raw_tp")
+__failure __msg("calling kernel function bpf_dynptr_from_skb_meta is not allowed")
+int skb_meta_invalid_ctx(void *ctx)
+{
+ struct bpf_dynptr meta;
+
+ /* this should fail */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+
+ return 0;
+}
+
SEC("fentry/skb_tx_error")
__failure __msg("must be referenced or trusted")
int BPF_PROG(skb_invalid_ctx_fentry, void *skb)
@@ -1665,6 +1900,29 @@ int clone_skb_packet_data(struct __sk_buff *skb)
return 0;
}
+/* A skb clone's metadata slice becomes invalid anytime packet data changes */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int clone_skb_packet_meta(struct __sk_buff *skb)
+{
+ struct bpf_dynptr clone, meta;
+ __u8 *md;
+
+ bpf_dynptr_from_skb_meta(skb, 0, &meta);
+ bpf_dynptr_clone(&meta, &clone);
+ md = bpf_dynptr_slice_rdwr(&clone, 0, NULL, sizeof(*md));
+ if (!md)
+ return SK_DROP;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ *md = 42;
+
+ return 0;
+}
+
/* A xdp clone's data slices should be invalid anytime packet data changes */
SEC("?xdp")
__failure __msg("invalid mem access 'scalar'")
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index 8315273cb900..127dea342e5a 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -211,6 +211,61 @@ int test_dynptr_skb_data(struct __sk_buff *skb)
return 1;
}
+SEC("?tc")
+int test_dynptr_skb_meta_data(struct __sk_buff *skb)
+{
+ struct bpf_dynptr meta;
+ __u8 *md;
+ int ret;
+
+ err = 1;
+ ret = bpf_dynptr_from_skb_meta(skb, 0, &meta);
+ if (ret)
+ return 1;
+
+ /* This should return NULL. Must use bpf_dynptr_slice API */
+ err = 2;
+ md = bpf_dynptr_data(&meta, 0, sizeof(*md));
+ if (md)
+ return 1;
+
+ err = 0;
+ return 1;
+}
+
+/* Check that skb metadata dynptr ops don't accept any flags. */
+SEC("?tc")
+int test_dynptr_skb_meta_flags(struct __sk_buff *skb)
+{
+ const __u64 INVALID_FLAGS = ~0ULL;
+ struct bpf_dynptr meta;
+ __u8 buf;
+ int ret;
+
+ err = 1;
+ ret = bpf_dynptr_from_skb_meta(skb, INVALID_FLAGS, &meta);
+ if (ret != -EINVAL)
+ return 1;
+
+ err = 2;
+ ret = bpf_dynptr_from_skb_meta(skb, 0, &meta);
+ if (ret)
+ return 1;
+
+ err = 3;
+ ret = bpf_dynptr_read(&buf, 0, &meta, 0, INVALID_FLAGS);
+ if (ret != -EINVAL)
+ return 1;
+
+ err = 4;
+ ret = bpf_dynptr_write(&meta, 0, &buf, 0, INVALID_FLAGS);
+ if (ret != -EINVAL)
+ return 1;
+
+ err = 0;
+ return 1;
+}
+
SEC("tp/syscalls/sys_enter_nanosleep")
int test_adjust(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
index 5e0a1ca96d4e..a01c2736890f 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_assert.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -18,43 +18,43 @@
return *(u64 *)num; \
}
-__msg(": R0_w=0xffffffff80000000")
+__msg(": R0=0xffffffff80000000")
check_assert(s64, ==, eq_int_min, INT_MIN);
-__msg(": R0_w=0x7fffffff")
+__msg(": R0=0x7fffffff")
check_assert(s64, ==, eq_int_max, INT_MAX);
-__msg(": R0_w=0")
+__msg(": R0=0")
check_assert(s64, ==, eq_zero, 0);
-__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000")
+__msg(": R0=0x8000000000000000 R1=0x8000000000000000")
check_assert(s64, ==, eq_llong_min, LLONG_MIN);
-__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff")
+__msg(": R0=0x7fffffffffffffff R1=0x7fffffffffffffff")
check_assert(s64, ==, eq_llong_max, LLONG_MAX);
-__msg(": R0_w=scalar(id=1,smax=0x7ffffffe)")
+__msg(": R0=scalar(id=1,smax=0x7ffffffe)")
check_assert(s64, <, lt_pos, INT_MAX);
-__msg(": R0_w=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
check_assert(s64, <, lt_zero, 0);
-__msg(": R0_w=scalar(id=1,smax=0xffffffff7fffffff")
+__msg(": R0=scalar(id=1,smax=0xffffffff7fffffff")
check_assert(s64, <, lt_neg, INT_MIN);
-__msg(": R0_w=scalar(id=1,smax=0x7fffffff)")
+__msg(": R0=scalar(id=1,smax=0x7fffffff)")
check_assert(s64, <=, le_pos, INT_MAX);
-__msg(": R0_w=scalar(id=1,smax=0)")
+__msg(": R0=scalar(id=1,smax=0)")
check_assert(s64, <=, le_zero, 0);
-__msg(": R0_w=scalar(id=1,smax=0xffffffff80000000")
+__msg(": R0=scalar(id=1,smax=0xffffffff80000000")
check_assert(s64, <=, le_neg, INT_MIN);
-__msg(": R0_w=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
check_assert(s64, >, gt_pos, INT_MAX);
-__msg(": R0_w=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
check_assert(s64, >, gt_zero, 0);
-__msg(": R0_w=scalar(id=1,smin=0xffffffff80000001")
+__msg(": R0=scalar(id=1,smin=0xffffffff80000001")
check_assert(s64, >, gt_neg, INT_MIN);
-__msg(": R0_w=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
check_assert(s64, >=, ge_pos, INT_MAX);
-__msg(": R0_w=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
check_assert(s64, >=, ge_zero, 0);
-__msg(": R0_w=scalar(id=1,smin=0xffffffff80000000")
+__msg(": R0=scalar(id=1,smin=0xffffffff80000000")
check_assert(s64, >=, ge_neg, INT_MIN);
SEC("?tc")
diff --git a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
index 544e5ac90461..d09bbd8ae8a8 100644
--- a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
+++ b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
@@ -12,7 +12,7 @@
SEC("freplace/connect_v4_prog")
int new_connect_v4_prog(struct bpf_sock_addr *ctx)
{
- // return value thats in invalid range
+ // return value that's in invalid range
return 255;
}
diff --git a/tools/testing/selftests/bpf/progs/iters_state_safety.c b/tools/testing/selftests/bpf/progs/iters_state_safety.c
index f41257eadbb2..d273b46dfc7c 100644
--- a/tools/testing/selftests/bpf/progs/iters_state_safety.c
+++ b/tools/testing/selftests/bpf/progs/iters_state_safety.c
@@ -30,7 +30,7 @@ int force_clang_to_emit_btf_for_externs(void *ctx)
SEC("?raw_tp")
__success __log_level(2)
-__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)")
+__msg("fp-8=iter_num(ref_id=1,state=active,depth=0)")
int create_and_destroy(void *ctx)
{
struct bpf_iter_num iter;
@@ -196,7 +196,7 @@ int leak_iter_from_subprog_fail(void *ctx)
SEC("?raw_tp")
__success __log_level(2)
-__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)")
+__msg("fp-8=iter_num(ref_id=1,state=active,depth=0)")
int valid_stack_reuse(void *ctx)
{
struct bpf_iter_num iter;
@@ -345,7 +345,7 @@ int __naked read_from_iter_slot_fail(void)
"r3 = 1000;"
"call %[bpf_iter_num_new];"
- /* attemp to leak bpf_iter_num state */
+ /* attempt to leak bpf_iter_num state */
"r7 = *(u64 *)(r6 + 0);"
"r8 = *(u64 *)(r6 + 8);"
diff --git a/tools/testing/selftests/bpf/progs/iters_task_failure.c b/tools/testing/selftests/bpf/progs/iters_task_failure.c
index 6b1588d70652..fe3663dedbe1 100644
--- a/tools/testing/selftests/bpf/progs/iters_task_failure.c
+++ b/tools/testing/selftests/bpf/progs/iters_task_failure.c
@@ -15,7 +15,7 @@ void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
-__failure __msg("expected an RCU CS when using bpf_iter_task_next")
+__failure __msg("kernel func bpf_iter_task_new requires RCU critical section protection")
int BPF_PROG(iter_tasks_without_lock)
{
struct task_struct *pos;
@@ -27,7 +27,7 @@ int BPF_PROG(iter_tasks_without_lock)
}
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
-__failure __msg("expected an RCU CS when using bpf_iter_css_next")
+__failure __msg("kernel func bpf_iter_css_new requires RCU critical section protection")
int BPF_PROG(iter_css_without_lock)
{
u64 cg_id = bpf_get_current_cgroup_id();
diff --git a/tools/testing/selftests/bpf/progs/iters_testmod.c b/tools/testing/selftests/bpf/progs/iters_testmod.c
index 9e4b45201e69..5379e9960ffd 100644
--- a/tools/testing/selftests/bpf/progs/iters_testmod.c
+++ b/tools/testing/selftests/bpf/progs/iters_testmod.c
@@ -123,3 +123,49 @@ out:
bpf_iter_num_destroy(&num_it);
return 0;
}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("kernel func bpf_kfunc_ret_rcu_test requires RCU critical section protection")
+int iter_ret_rcu_test_protected(const void *ctx)
+{
+ struct task_struct *p;
+
+ p = bpf_kfunc_ret_rcu_test();
+ return p->pid;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("R1 type=rcu_ptr_or_null_ expected=")
+int iter_ret_rcu_test_type(const void *ctx)
+{
+ struct task_struct *p;
+
+ bpf_rcu_read_lock();
+ p = bpf_kfunc_ret_rcu_test();
+ bpf_this_cpu_ptr(p);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("kernel func bpf_kfunc_ret_rcu_test_nostruct requires RCU critical section protection")
+int iter_ret_rcu_test_protected_nostruct(const void *ctx)
+{
+ void *p;
+
+ p = bpf_kfunc_ret_rcu_test_nostruct(4);
+ return *(int *)p;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("R1 type=rdonly_rcu_mem_or_null expected=")
+int iter_ret_rcu_test_type_nostruct(const void *ctx)
+{
+ void *p;
+
+ bpf_rcu_read_lock();
+ p = bpf_kfunc_ret_rcu_test_nostruct(4);
+ bpf_this_cpu_ptr(p);
+ bpf_rcu_read_unlock();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
index 6543d5b6e0a9..83791348bed5 100644
--- a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
+++ b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
@@ -20,7 +20,7 @@ __s64 res_empty;
SEC("raw_tp/sys_enter")
__success __log_level(2)
-__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)")
__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
__msg("call bpf_iter_testmod_seq_destroy")
int testmod_seq_empty(const void *ctx)
@@ -38,7 +38,7 @@ __s64 res_full;
SEC("raw_tp/sys_enter")
__success __log_level(2)
-__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)")
__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
__msg("call bpf_iter_testmod_seq_destroy")
int testmod_seq_full(const void *ctx)
@@ -58,7 +58,7 @@ static volatile int zero = 0;
SEC("raw_tp/sys_enter")
__success __log_level(2)
-__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)")
__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
__msg("call bpf_iter_testmod_seq_destroy")
int testmod_seq_truncated(const void *ctx)
diff --git a/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c b/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c
new file mode 100644
index 000000000000..f77aef0474d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#if defined(__TARGET_ARCH_x86)
+SEC("kprobe")
+int kprobe_write_ctx(struct pt_regs *ctx)
+{
+ ctx->ax = 0;
+ return 0;
+}
+
+SEC("kprobe.multi")
+int kprobe_multi_write_ctx(struct pt_regs *ctx)
+{
+ ctx->ax = 0;
+ return 0;
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c
index 6438982b928b..ddd26d1a083f 100644
--- a/tools/testing/selftests/bpf/progs/linked_list_fail.c
+++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c
@@ -226,8 +226,7 @@ int obj_new_no_composite(void *ctx)
SEC("?tc")
int obj_new_no_struct(void *ctx)
{
-
- bpf_obj_new(union { int data; unsigned udata; });
+ (void)bpf_obj_new(union { int data; unsigned udata; });
return 0;
}
@@ -252,7 +251,7 @@ int new_null_ret(void *ctx)
SEC("?tc")
int obj_new_acq(void *ctx)
{
- bpf_obj_new(struct foo);
+ (void)bpf_obj_new(struct foo);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/loop1.c b/tools/testing/selftests/bpf/progs/loop1.c
index 50e66772c046..b0fa26fb4760 100644
--- a/tools/testing/selftests/bpf/progs/loop1.c
+++ b/tools/testing/selftests/bpf/progs/loop1.c
@@ -1,11 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <linux/sched.h>
-#include <linux/ptrace.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/loop2.c b/tools/testing/selftests/bpf/progs/loop2.c
index 947bb7e988c2..0227409d4b0e 100644
--- a/tools/testing/selftests/bpf/progs/loop2.c
+++ b/tools/testing/selftests/bpf/progs/loop2.c
@@ -1,11 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <linux/sched.h>
-#include <linux/ptrace.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c
index 717dab14322b..5d1c9a775e6b 100644
--- a/tools/testing/selftests/bpf/progs/loop3.c
+++ b/tools/testing/selftests/bpf/progs/loop3.c
@@ -1,11 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <linux/sched.h>
-#include <linux/ptrace.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c
index e4ff97fbcce1..dd36aff4fba3 100644
--- a/tools/testing/selftests/bpf/progs/loop6.c
+++ b/tools/testing/selftests/bpf/progs/loop6.c
@@ -1,8 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/ptrace.h>
-#include <stddef.h>
-#include <linux/bpf.h>
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
@@ -26,12 +25,6 @@ char _license[] SEC("license") = "GPL";
#define SG_CHAIN 0x01UL
#define SG_END 0x02UL
-struct scatterlist {
- unsigned long page_link;
- unsigned int offset;
- unsigned int length;
-};
-
#define sg_is_chain(sg) ((sg)->page_link & SG_CHAIN)
#define sg_is_last(sg) ((sg)->page_link & SG_END)
#define sg_chain_ptr(sg) \
@@ -62,7 +55,7 @@ static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i)
return sgp;
}
-int config = 0;
+int run_once = 0;
int result = 0;
SEC("kprobe/virtqueue_add_sgs")
@@ -73,14 +66,14 @@ int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
__u64 length1 = 0, length2 = 0;
unsigned int i, n, len;
- if (config != 0)
+ if (run_once != 0)
return 0;
for (i = 0; (i < VIRTIO_MAX_SGS) && (i < out_sgs); i++) {
__sink(out_sgs);
for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
sgp = __sg_next(sgp)) {
- bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+ len = BPF_CORE_READ(sgp, length);
length1 += len;
n++;
}
@@ -90,13 +83,13 @@ int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
__sink(in_sgs);
for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
sgp = __sg_next(sgp)) {
- bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+ len = BPF_CORE_READ(sgp, length);
length2 += len;
n++;
}
}
- config = 1;
+ run_once = 1;
result = length2 - length1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie.h b/tools/testing/selftests/bpf/progs/lpm_trie.h
new file mode 100644
index 000000000000..76aa5821807f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lpm_trie.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __PROGS_LPM_TRIE_H
+#define __PROGS_LPM_TRIE_H
+
+struct trie_key {
+ __u32 prefixlen;
+ __u32 data;
+};
+
+/* Benchmark operations */
+enum {
+ LPM_OP_NOOP = 0,
+ LPM_OP_BASELINE,
+ LPM_OP_LOOKUP,
+ LPM_OP_INSERT,
+ LPM_OP_UPDATE,
+ LPM_OP_DELETE,
+ LPM_OP_FREE
+};
+
+/*
+ * Return values from run_bench.
+ *
+ * Negative values are also allowed and represent kernel error codes.
+ */
+#define LPM_BENCH_SUCCESS 0
+#define LPM_BENCH_REINIT_MAP 1 /* Reset trie to initial state for current op */
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie_bench.c b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c
new file mode 100644
index 000000000000..a0e6ebd5507a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Cloudflare */
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "bpf_atomic.h"
+#include "progs/lpm_trie.h"
+
+#define BPF_OBJ_NAME_LEN 16U
+#define MAX_ENTRIES 100000000
+#define NR_LOOPS 10000
+
+char _license[] SEC("license") = "GPL";
+
+/* Filled by userspace. See fill_map() in bench_lpm_trie_map.c */
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __type(key, struct trie_key);
+ __type(value, __u32);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, MAX_ENTRIES);
+} trie_map SEC(".maps");
+
+long hits;
+long duration_ns;
+
+/* Configured from userspace */
+__u32 nr_entries;
+__u32 prefixlen;
+bool random;
+__u8 op;
+
+static __u64 latency_free_start;
+
+SEC("fentry/bpf_map_free_deferred")
+int BPF_PROG(trie_free_entry, struct work_struct *work)
+{
+ struct bpf_map *map = container_of(work, struct bpf_map, work);
+ char name[BPF_OBJ_NAME_LEN];
+ u32 map_type;
+
+ map_type = BPF_CORE_READ(map, map_type);
+ if (map_type != BPF_MAP_TYPE_LPM_TRIE)
+ return 0;
+
+ /*
+ * Ideally we'd have access to the map ID but that's already
+ * freed before we enter trie_free().
+ */
+ BPF_CORE_READ_STR_INTO(&name, map, name);
+ if (bpf_strncmp(name, BPF_OBJ_NAME_LEN, "trie_free_map"))
+ return 0;
+
+ latency_free_start = bpf_ktime_get_ns();
+
+ return 0;
+}
+
+SEC("fexit/bpf_map_free_deferred")
+int BPF_PROG(trie_free_exit, struct work_struct *work)
+{
+ __u64 val;
+
+ if (!latency_free_start)
+ return 0;
+
+ val = bpf_ktime_get_ns() - latency_free_start;
+ latency_free_start = 0;
+
+ __sync_add_and_fetch(&duration_ns, val);
+ __sync_add_and_fetch(&hits, 1);
+
+ return 0;
+}
+
+static __u32 cur_key;
+
+static __always_inline void generate_key(struct trie_key *key)
+{
+ key->prefixlen = prefixlen;
+
+ if (random)
+ key->data = bpf_get_prandom_u32() % nr_entries;
+ else
+ key->data = cur_key++ % nr_entries;
+}
+
+static int noop(__u32 index, __u32 *unused)
+{
+ return 0;
+}
+
+static int baseline(__u32 index, __u32 *unused)
+{
+ struct trie_key key;
+ __u32 blackbox = 0;
+
+ generate_key(&key);
+ /* Avoid compiler optimizing out the modulo */
+ barrier_var(blackbox);
+ blackbox = READ_ONCE(key.data);
+
+ return 0;
+}
+
+static int lookup(__u32 index, int *retval)
+{
+ struct trie_key key;
+
+ generate_key(&key);
+ if (!bpf_map_lookup_elem(&trie_map, &key)) {
+ *retval = -ENOENT;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int insert(__u32 index, int *retval)
+{
+ struct trie_key key;
+ u32 val = 1;
+ int err;
+
+ generate_key(&key);
+ err = bpf_map_update_elem(&trie_map, &key, &val, BPF_NOEXIST);
+ if (err) {
+ *retval = err;
+ return 1;
+ }
+
+ /* Is this the last entry? */
+ if (key.data == nr_entries - 1) {
+ /* For atomicity concerns, see the comment in delete() */
+ *retval = LPM_BENCH_REINIT_MAP;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int update(__u32 index, int *retval)
+{
+ struct trie_key key;
+ u32 val = 1;
+ int err;
+
+ generate_key(&key);
+ err = bpf_map_update_elem(&trie_map, &key, &val, BPF_EXIST);
+ if (err) {
+ *retval = err;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int delete(__u32 index, int *retval)
+{
+ struct trie_key key;
+ int err;
+
+ generate_key(&key);
+ err = bpf_map_delete_elem(&trie_map, &key);
+ if (err) {
+ *retval = err;
+ return 1;
+ }
+
+ /* Do we need to refill the map? */
+ if (key.data == nr_entries - 1) {
+ /*
+ * Atomicity isn't required because DELETE only supports
+ * one producer running concurrently. What we need is a
+ * way to track how many entries have been deleted from
+ * the trie between consecutive invocations of the BPF
+ * prog because a single bpf_loop() call might not
+ * delete all entries, e.g. when NR_LOOPS < nr_entries.
+ */
+ *retval = LPM_BENCH_REINIT_MAP;
+ return 1;
+ }
+
+ return 0;
+}
+
+SEC("xdp")
+int BPF_PROG(run_bench)
+{
+ int err = LPM_BENCH_SUCCESS;
+ u64 start, delta;
+ int loops;
+
+ start = bpf_ktime_get_ns();
+
+ switch (op) {
+ case LPM_OP_NOOP:
+ loops = bpf_loop(NR_LOOPS, noop, NULL, 0);
+ break;
+ case LPM_OP_BASELINE:
+ loops = bpf_loop(NR_LOOPS, baseline, NULL, 0);
+ break;
+ case LPM_OP_LOOKUP:
+ loops = bpf_loop(NR_LOOPS, lookup, &err, 0);
+ break;
+ case LPM_OP_INSERT:
+ loops = bpf_loop(NR_LOOPS, insert, &err, 0);
+ break;
+ case LPM_OP_UPDATE:
+ loops = bpf_loop(NR_LOOPS, update, &err, 0);
+ break;
+ case LPM_OP_DELETE:
+ loops = bpf_loop(NR_LOOPS, delete, &err, 0);
+ break;
+ default:
+ bpf_printk("invalid benchmark operation\n");
+ return -1;
+ }
+
+ delta = bpf_ktime_get_ns() - start;
+
+ __sync_add_and_fetch(&duration_ns, delta);
+ __sync_add_and_fetch(&hits, loops);
+
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie_map.c b/tools/testing/selftests/bpf/progs/lpm_trie_map.c
new file mode 100644
index 000000000000..6e60d686b664
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lpm_trie_map.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define MAX_ENTRIES 100000000
+
+struct trie_key {
+ __u32 prefixlen;
+ __u32 data;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __type(key, struct trie_key);
+ __type(value, __u32);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, MAX_ENTRIES);
+} trie_free_map SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/map_excl.c b/tools/testing/selftests/bpf/progs/map_excl.c
new file mode 100644
index 000000000000..d461684728e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_excl.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Google LLC. */
+#include <linux/bpf.h>
+#include <time.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+} excl_map SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int should_have_access(void *ctx)
+{
+ int key = 0, value = 0xdeadbeef;
+
+ bpf_map_update_elem(&excl_map, &key, &value, 0);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int should_not_have_access(void *ctx)
+{
+ int key = 0, value = 0xdeadbeef;
+
+ bpf_map_update_elem(&excl_map, &key, &value, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
index 4f94c971ae86..3b984b6ae7c0 100644
--- a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
+++ b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
@@ -8,8 +8,8 @@
SEC("tp_btf/sys_enter")
__success
__log_level(2)
-__msg("r8 = *(u64 *)(r7 +0) ; R7_w=ptr_nameidata(off={{[0-9]+}}) R8_w=rdonly_untrusted_mem(sz=0)")
-__msg("r9 = *(u8 *)(r8 +0) ; R8_w=rdonly_untrusted_mem(sz=0) R9_w=scalar")
+__msg("r8 = *(u64 *)(r7 +0) ; R7=ptr_nameidata(off={{[0-9]+}}) R8=rdonly_untrusted_mem(sz=0)")
+__msg("r9 = *(u8 *)(r8 +0) ; R8=rdonly_untrusted_mem(sz=0) R9=scalar")
int btf_id_to_ptr_mem(void *ctx)
{
struct task_struct *task;
diff --git a/tools/testing/selftests/bpf/progs/rbtree_search.c b/tools/testing/selftests/bpf/progs/rbtree_search.c
index 098ef970fac1..b05565d1db0d 100644
--- a/tools/testing/selftests/bpf/progs/rbtree_search.c
+++ b/tools/testing/selftests/bpf/progs/rbtree_search.c
@@ -183,7 +183,7 @@ long test_##op##_spinlock_##dolock(void *ctx) \
}
/*
- * Use a spearate MSG macro instead of passing to TEST_XXX(..., MSG)
+ * Use a separate MSG macro instead of passing to TEST_XXX(..., MSG)
* to ensure the message itself is not in the bpf prog lineinfo
* which the verifier includes in its log.
* Otherwise, the test_loader will incorrectly match the prog lineinfo
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/stacktrace_map.c
index 47568007b668..0c77df05be7f 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/progs/stacktrace_map.c
@@ -50,6 +50,7 @@ struct sched_switch_args {
int next_prio;
};
+__u32 stack_id;
SEC("tracepoint/sched/sched_switch")
int oncpu(struct sched_switch_args *ctx)
{
@@ -64,6 +65,7 @@ int oncpu(struct sched_switch_args *ctx)
/* The size of stackmap and stackid_hmap should be the same */
key = bpf_get_stackid(ctx, &stackmap, 0);
if ((int)key >= 0) {
+ stack_id = key;
bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
stack_p = bpf_map_lookup_elem(&stack_amap, &key);
if (stack_p)
diff --git a/tools/testing/selftests/bpf/progs/stream.c b/tools/testing/selftests/bpf/progs/stream.c
index 35790897dc87..4a5bd852f10c 100644
--- a/tools/testing/selftests/bpf/progs/stream.c
+++ b/tools/testing/selftests/bpf/progs/stream.c
@@ -5,6 +5,7 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
struct arr_elem {
struct bpf_res_spin_lock lock;
@@ -17,10 +18,29 @@ struct {
__type(value, struct arr_elem);
} arrmap SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 1); /* number of pages */
+} arena SEC(".maps");
+
+struct elem {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
#define ENOSPC 28
#define _STR "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
int size;
+u64 fault_addr;
+void *arena_ptr;
SEC("syscall")
__success __retval(0)
@@ -37,7 +57,15 @@ int stream_exhaust(void *ctx)
}
SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__arch_s390x
__success __retval(0)
+__stderr("ERROR: Timeout detected for may_goto instruction")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
int stream_cond_break(void *ctx)
{
while (can_loop)
@@ -47,6 +75,15 @@ int stream_cond_break(void *ctx)
SEC("syscall")
__success __retval(0)
+__stderr("ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock")
+__stderr("{{Attempted lock = (0x[0-9a-fA-F]+)\n"
+"Total held locks = 1\n"
+"Held lock\\[ 0\\] = \\1}}")
+__stderr("...")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
int stream_deadlock(void *ctx)
{
struct bpf_res_spin_lock *lock, *nlock;
@@ -76,4 +113,125 @@ int stream_syscall(void *ctx)
return 0;
}
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_write_fault(void *ctx)
+{
+ struct bpf_arena *ptr = (void *)&arena;
+ u64 user_vm_start;
+
+ /* Prevent GCC bounds warning: casting &arena to struct bpf_arena *
+ * triggers bounds checking since the map definition is smaller than struct
+ * bpf_arena. barrier_var() makes the pointer opaque to GCC, preventing the
+ * bounds analysis
+ */
+ barrier_var(ptr);
+ user_vm_start = ptr->user_vm_start;
+ fault_addr = user_vm_start + 0x7fff;
+ bpf_addr_space_cast(user_vm_start, 0, 1);
+ asm volatile (
+ "r1 = %0;"
+ "r2 = 1;"
+ "*(u32 *)(r1 + 0x7fff) = r2;"
+ :
+ : "r" (user_vm_start)
+ : "r1", "r2"
+ );
+ return 0;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena READ access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_read_fault(void *ctx)
+{
+ struct bpf_arena *ptr = (void *)&arena;
+ u64 user_vm_start;
+
+ /* Prevent GCC bounds warning: casting &arena to struct bpf_arena *
+ * triggers bounds checking since the map definition is smaller than struct
+ * bpf_arena. barrier_var() makes the pointer opaque to GCC, preventing the
+ * bounds analysis
+ */
+ barrier_var(ptr);
+ user_vm_start = ptr->user_vm_start;
+ fault_addr = user_vm_start + 0x7fff;
+ bpf_addr_space_cast(user_vm_start, 0, 1);
+ asm volatile (
+ "r1 = %0;"
+ "r1 = *(u32 *)(r1 + 0x7fff);"
+ :
+ : "r" (user_vm_start)
+ : "r1"
+ );
+ return 0;
+}
+
+static __noinline void subprog(void)
+{
+ int __arena *addr = (int __arena *)0xdeadbeef;
+
+ arena_ptr = &arena;
+ *addr = 1;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_subprog_fault(void *ctx)
+{
+ subprog();
+ return 0;
+}
+
+static __noinline int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+ int __arena *addr = (int __arena *)0xdeadbeef;
+
+ arena_ptr = &arena;
+ *addr = 1;
+ return 0;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_callback_fault(void *ctx)
+{
+ struct bpf_timer *arr_timer;
+
+ arr_timer = bpf_map_lookup_elem(&array, &(int){0});
+ if (!arr_timer)
+ return 0;
+ bpf_timer_init(arr_timer, &array, 1);
+ bpf_timer_set_callback(arr_timer, timer_cb);
+ bpf_timer_start(arr_timer, 0, 0);
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
index 53af438bd998..99d72c68f76a 100644
--- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
@@ -31,6 +31,8 @@ char *invalid_kern_ptr = (char *)-1;
/* Passing NULL to string kfuncs (treated as a userspace ptr) */
SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_null1(void *ctx) { return bpf_strcmp(NULL, "hello"); }
SEC("syscall") __retval(USER_PTR_ERR)int test_strcmp_null2(void *ctx) { return bpf_strcmp("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_null1(void *ctx) { return bpf_strcasecmp(NULL, "HELLO"); }
+SEC("syscall") __retval(USER_PTR_ERR)int test_strcasecmp_null2(void *ctx) { return bpf_strcasecmp("HELLO", NULL); }
SEC("syscall") __retval(USER_PTR_ERR)int test_strchr_null(void *ctx) { return bpf_strchr(NULL, 'a'); }
SEC("syscall") __retval(USER_PTR_ERR)int test_strchrnul_null(void *ctx) { return bpf_strchrnul(NULL, 'a'); }
SEC("syscall") __retval(USER_PTR_ERR)int test_strnchr_null(void *ctx) { return bpf_strnchr(NULL, 1, 'a'); }
@@ -49,6 +51,8 @@ SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null2(void *ctx) { return
/* Passing userspace ptr to string kfuncs */
SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { return bpf_strcmp(user_ptr, "hello"); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr2(void *ctx) { return bpf_strcmp("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr1(void *ctx) { return bpf_strcasecmp(user_ptr, "HELLO"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr2(void *ctx) { return bpf_strcasecmp("HELLO", user_ptr); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strchr_user_ptr(void *ctx) { return bpf_strchr(user_ptr, 'a'); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strchrnul_user_ptr(void *ctx) { return bpf_strchrnul(user_ptr, 'a'); }
SEC("syscall") __retval(USER_PTR_ERR) int test_strnchr_user_ptr(void *ctx) { return bpf_strnchr(user_ptr, 1, 'a'); }
@@ -69,6 +73,8 @@ SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr2(void *ctx) { re
/* Passing invalid kernel ptr to string kfuncs should always return -EFAULT */
SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault1(void *ctx) { return bpf_strcmp(invalid_kern_ptr, "hello"); }
SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault2(void *ctx) { return bpf_strcmp("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault1(void *ctx) { return bpf_strcasecmp(invalid_kern_ptr, "HELLO"); }
+SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault2(void *ctx) { return bpf_strcasecmp("HELLO", invalid_kern_ptr); }
SEC("syscall") __retval(-EFAULT) int test_strchr_pagefault(void *ctx) { return bpf_strchr(invalid_kern_ptr, 'a'); }
SEC("syscall") __retval(-EFAULT) int test_strchrnul_pagefault(void *ctx) { return bpf_strchrnul(invalid_kern_ptr, 'a'); }
SEC("syscall") __retval(-EFAULT) int test_strnchr_pagefault(void *ctx) { return bpf_strnchr(invalid_kern_ptr, 1, 'a'); }
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
index 89fb4669b0e9..e41cc5601994 100644
--- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
@@ -7,6 +7,7 @@
char long_str[XATTR_SIZE_MAX + 1];
SEC("syscall") int test_strcmp_too_long(void *ctx) { return bpf_strcmp(long_str, long_str); }
+SEC("syscall") int test_strcasecmp_too_long(void *ctx) { return bpf_strcasecmp(long_str, long_str); }
SEC("syscall") int test_strchr_too_long(void *ctx) { return bpf_strchr(long_str, 'b'); }
SEC("syscall") int test_strchrnul_too_long(void *ctx) { return bpf_strchrnul(long_str, 'b'); }
SEC("syscall") int test_strnchr_too_long(void *ctx) { return bpf_strnchr(long_str, sizeof(long_str), 'b'); }
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
index 46697f381878..2e3498e37b9c 100644
--- a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
@@ -12,6 +12,11 @@ char str[] = "hello world";
/* Functional tests */
__test(0) int test_strcmp_eq(void *ctx) { return bpf_strcmp(str, "hello world"); }
__test(1) int test_strcmp_neq(void *ctx) { return bpf_strcmp(str, "hello"); }
+__test(0) int test_strcasecmp_eq1(void *ctx) { return bpf_strcasecmp(str, "hello world"); }
+__test(0) int test_strcasecmp_eq2(void *ctx) { return bpf_strcasecmp(str, "HELLO WORLD"); }
+__test(0) int test_strcasecmp_eq3(void *ctx) { return bpf_strcasecmp(str, "HELLO world"); }
+__test(1) int test_strcasecmp_neq1(void *ctx) { return bpf_strcasecmp(str, "hello"); }
+__test(1) int test_strcasecmp_neq2(void *ctx) { return bpf_strcasecmp(str, "HELLO"); }
__test(1) int test_strchr_found(void *ctx) { return bpf_strchr(str, 'e'); }
__test(11) int test_strchr_null(void *ctx) { return bpf_strchr(str, '\0'); }
__test(-ENOENT) int test_strchr_notfound(void *ctx) { return bpf_strchr(str, 'x'); }
@@ -30,8 +35,12 @@ __test(2) int test_strcspn(void *ctx) { return bpf_strcspn(str, "lo"); }
__test(6) int test_strstr_found(void *ctx) { return bpf_strstr(str, "world"); }
__test(-ENOENT) int test_strstr_notfound(void *ctx) { return bpf_strstr(str, "hi"); }
__test(0) int test_strstr_empty(void *ctx) { return bpf_strstr(str, ""); }
-__test(0) int test_strnstr_found(void *ctx) { return bpf_strnstr(str, "hello", 6); }
-__test(-ENOENT) int test_strnstr_notfound(void *ctx) { return bpf_strnstr(str, "hi", 10); }
+__test(0) int test_strnstr_found1(void *ctx) { return bpf_strnstr("", "", 0); }
+__test(0) int test_strnstr_found2(void *ctx) { return bpf_strnstr(str, "hello", 5); }
+__test(0) int test_strnstr_found3(void *ctx) { return bpf_strnstr(str, "hello", 6); }
+__test(-ENOENT) int test_strnstr_notfound1(void *ctx) { return bpf_strnstr(str, "hi", 10); }
+__test(-ENOENT) int test_strnstr_notfound2(void *ctx) { return bpf_strnstr(str, "hello", 4); }
+__test(-ENOENT) int test_strnstr_notfound3(void *ctx) { return bpf_strnstr("", "a", 0); }
__test(0) int test_strnstr_empty(void *ctx) { return bpf_strnstr(str, "", 1); }
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c
new file mode 100644
index 000000000000..ad8bb546c9bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define bpf_kfunc_multi_st_ops_test_1(args) bpf_kfunc_multi_st_ops_test_1(args, st_ops_id)
+int st_ops_id;
+
+int test_pid;
+int test_err;
+
+#define MAP1_MAGIC 1234
+
+SEC("struct_ops")
+int BPF_PROG(test_1, struct st_ops_args *args)
+{
+ return MAP1_MAGIC;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(sys_enter, struct pt_regs *regs, long id)
+{
+ struct st_ops_args args = {};
+ struct task_struct *task;
+ int ret;
+
+ task = bpf_get_current_task_btf();
+ if (!test_pid || task->pid != test_pid)
+ return 0;
+
+ ret = bpf_kfunc_multi_st_ops_test_1(&args);
+ if (ret != MAP1_MAGIC)
+ test_err++;
+
+ return 0;
+}
+
+SEC("syscall")
+int syscall_prog(void *ctx)
+{
+ struct st_ops_args args = {};
+ int ret;
+
+ ret = bpf_kfunc_multi_st_ops_test_1(&args);
+ if (ret != MAP1_MAGIC)
+ test_err++;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map = {
+ .test_1 = (void *)test_1,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c
new file mode 100644
index 000000000000..cea1a2f4b62f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define bpf_kfunc_multi_st_ops_test_1(args) bpf_kfunc_multi_st_ops_test_1(args, st_ops_id)
+int st_ops_id;
+
+int test_pid;
+int test_err;
+
+#define MAP2_MAGIC 4567
+
+SEC("struct_ops")
+int BPF_PROG(test_1, struct st_ops_args *args)
+{
+ return MAP2_MAGIC;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(sys_enter, struct pt_regs *regs, long id)
+{
+ struct st_ops_args args = {};
+ struct task_struct *task;
+ int ret;
+
+ task = bpf_get_current_task_btf();
+ if (!test_pid || task->pid != test_pid)
+ return 0;
+
+ ret = bpf_kfunc_multi_st_ops_test_1(&args);
+ if (ret != MAP2_MAGIC)
+ test_err++;
+
+ return 0;
+}
+
+SEC("syscall")
+int syscall_prog(void *ctx)
+{
+ struct st_ops_args args = {};
+ int ret;
+
+ ret = bpf_kfunc_multi_st_ops_test_1(&args);
+ if (ret != MAP2_MAGIC)
+ test_err++;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map = {
+ .test_1 = (void *)test_1,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c
index 36386b3c23a1..2b98b7710816 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c
@@ -9,7 +9,7 @@ void bpf_task_release(struct task_struct *p) __ksym;
/* This test struct_ops BPF programs returning referenced kptr. The verifier should
* allow a referenced kptr or a NULL pointer to be returned. A referenced kptr to task
- * here is acquried automatically as the task argument is tagged with "__ref".
+ * here is acquired automatically as the task argument is tagged with "__ref".
*/
SEC("struct_ops/test_return_ref_kptr")
struct task_struct *BPF_PROG(kptr_return, int dummy,
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c
index 76dcb6089d7f..9c0a65466356 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c
@@ -9,7 +9,7 @@ __attribute__((nomerge)) extern void bpf_task_release(struct task_struct *p) __k
/* This is a test BPF program that uses struct_ops to access a referenced
* kptr argument. This is a test for the verifier to ensure that it
- * 1) recongnizes the task as a referenced object (i.e., ref_obj_id > 0), and
+ * 1) recognizes the task as a referenced object (i.e., ref_obj_id > 0), and
* 2) the same reference can be acquired from multiple paths as long as it
* has not been released.
*/
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c
index 327ca395e860..d556b19413d7 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c
@@ -2,6 +2,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_legacy.h"
+#include "bpf_test_utils.h"
struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
@@ -24,6 +25,8 @@ int entry(struct __sk_buff *skb)
{
int ret = 1;
+ clobber_regs_stack();
+
count++;
subprog_tail(skb);
subprog_tail(skb);
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c
index 72fd0d577506..ae94c9c70ab7 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c
@@ -2,6 +2,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include "bpf_test_utils.h"
int classifier_0(struct __sk_buff *skb);
int classifier_1(struct __sk_buff *skb);
@@ -60,6 +61,8 @@ int tailcall_bpf2bpf_hierarchy_2(struct __sk_buff *skb)
{
int ret = 0;
+ clobber_regs_stack();
+
subprog_tail0(skb);
subprog_tail1(skb);
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c
index a7fb91cb05b7..56b6b0099840 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c
@@ -2,6 +2,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include "bpf_test_utils.h"
int classifier_0(struct __sk_buff *skb);
@@ -53,6 +54,8 @@ int tailcall_bpf2bpf_hierarchy_3(struct __sk_buff *skb)
{
int ret = 0;
+ clobber_regs_stack();
+
bpf_tail_call_static(skb, &jmp_table0, 0);
__sink(ret);
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c
index c87f9ca982d3..5261395713cd 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c
@@ -4,6 +4,7 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include "bpf_test_utils.h"
struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
@@ -24,6 +25,8 @@ int subprog_tail(void *ctx)
SEC("fentry/dummy")
int BPF_PROG(fentry, struct sk_buff *skb)
{
+ clobber_regs_stack();
+
count++;
subprog_tail(ctx);
subprog_tail(ctx);
diff --git a/tools/testing/selftests/bpf/progs/task_local_data.bpf.h b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
new file mode 100644
index 000000000000..432fff2af844
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TASK_LOCAL_DATA_BPF_H
+#define __TASK_LOCAL_DATA_BPF_H
+
+/*
+ * Task local data is a library that facilitates sharing per-task data
+ * between user space and bpf programs.
+ *
+ *
+ * USAGE
+ *
+ * A TLD, an entry of data in task local data, first needs to be created by the
+ * user space. This is done by calling user space API, TLD_DEFINE_KEY() or
+ * tld_create_key(), with the name of the TLD and the size.
+ *
+ * TLD_DEFINE_KEY(prio, "priority", sizeof(int));
+ *
+ * or
+ *
+ * void func_call(...) {
+ * tld_key_t prio, in_cs;
+ *
+ * prio = tld_create_key("priority", sizeof(int));
+ * in_cs = tld_create_key("in_critical_section", sizeof(bool));
+ * ...
+ *
+ * A key associated with the TLD, which has an opaque type tld_key_t, will be
+ * initialized or returned. It can be used to get a pointer to the TLD in the
+ * user space by calling tld_get_data().
+ *
+ * In a bpf program, tld_object_init() first needs to be called to initialized a
+ * tld_object on the stack. Then, TLDs can be accessed by calling tld_get_data().
+ * The API will try to fetch the key by the name and use it to locate the data.
+ * A pointer to the TLD will be returned. It also caches the key in a task local
+ * storage map, tld_key_map, whose value type, struct tld_keys, must be defined
+ * by the developer.
+ *
+ * struct tld_keys {
+ * tld_key_t prio;
+ * tld_key_t in_cs;
+ * };
+ *
+ * SEC("struct_ops")
+ * void prog(struct task_struct task, ...)
+ * {
+ * struct tld_object tld_obj;
+ * int err, *p;
+ *
+ * err = tld_object_init(task, &tld_obj);
+ * if (err)
+ * return;
+ *
+ * p = tld_get_data(&tld_obj, prio, "priority", sizeof(int));
+ * if (p)
+ * // do something depending on *p
+ */
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+
+#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1))
+#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1)
+
+#define TLD_MAX_DATA_CNT (__PAGE_SIZE / sizeof(struct tld_metadata) - 1)
+
+#ifndef TLD_NAME_LEN
+#define TLD_NAME_LEN 62
+#endif
+
+#ifndef TLD_KEY_MAP_CREATE_RETRY
+#define TLD_KEY_MAP_CREATE_RETRY 10
+#endif
+
+typedef struct {
+ __s16 off;
+} tld_key_t;
+
+struct tld_metadata {
+ char name[TLD_NAME_LEN];
+ __u16 size;
+};
+
+struct tld_meta_u {
+ __u8 cnt;
+ __u16 size;
+ struct tld_metadata metadata[TLD_MAX_DATA_CNT];
+};
+
+struct tld_data_u {
+ __u64 start; /* offset of tld_data_u->data in a page */
+ char data[__PAGE_SIZE - sizeof(__u64)];
+};
+
+struct tld_map_value {
+ struct tld_data_u __uptr *data;
+ struct tld_meta_u __uptr *meta;
+};
+
+typedef struct tld_uptr_dummy {
+ struct tld_data_u data[0];
+ struct tld_meta_u meta[0];
+} *tld_uptr_dummy_t;
+
+struct tld_object {
+ struct tld_map_value *data_map;
+ struct tld_keys *key_map;
+ /*
+ * Force the compiler to generate the actual definition of tld_meta_u
+ * and tld_data_u in BTF. Without it, tld_meta_u and u_tld_data will
+ * be BTF_KIND_FWD.
+ */
+ tld_uptr_dummy_t dummy[0];
+};
+
+/*
+ * Map value of tld_key_map for caching keys. Must be defined by the developer.
+ * Members should be tld_key_t and passed to the 3rd argument of tld_fetch_key().
+ */
+struct tld_keys;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct tld_map_value);
+} tld_data_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct tld_keys);
+} tld_key_map SEC(".maps");
+
+/**
+ * tld_object_init() - Initialize a tld_object.
+ *
+ * @task: The task_struct of the target task
+ * @tld_obj: A pointer to a tld_object to be initialized
+ *
+ * Return 0 on success; -ENODATA if the user space did not initialize task local data
+ * for the current task through tld_get_data(); -ENOMEM if the creation of tld_key_map
+ * fails
+ */
+__attribute__((unused))
+static int tld_object_init(struct task_struct *task, struct tld_object *tld_obj)
+{
+ int i;
+
+ tld_obj->data_map = bpf_task_storage_get(&tld_data_map, task, 0, 0);
+ if (!tld_obj->data_map)
+ return -ENODATA;
+
+ bpf_for(i, 0, TLD_KEY_MAP_CREATE_RETRY) {
+ tld_obj->key_map = bpf_task_storage_get(&tld_key_map, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (likely(tld_obj->key_map))
+ break;
+ }
+ if (!tld_obj->key_map)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/*
+ * Return the offset of TLD if @name is found. Otherwise, return the current TLD count
+ * using the nonpositive range so that the next tld_get_data() can skip fetching key if
+ * no new TLD is added or start comparing name from the first newly added TLD.
+ */
+__attribute__((unused))
+static int __tld_fetch_key(struct tld_object *tld_obj, const char *name, int i_start)
+{
+ struct tld_metadata *metadata;
+ int i, cnt, start, off = 0;
+
+ if (!tld_obj->data_map || !tld_obj->data_map->data || !tld_obj->data_map->meta)
+ return 0;
+
+ start = tld_obj->data_map->data->start;
+ cnt = tld_obj->data_map->meta->cnt;
+ metadata = tld_obj->data_map->meta->metadata;
+
+ bpf_for(i, 0, cnt) {
+ if (i >= TLD_MAX_DATA_CNT)
+ break;
+
+ if (i >= i_start && !bpf_strncmp(metadata[i].name, TLD_NAME_LEN, name))
+ return start + off;
+
+ off += TLD_ROUND_UP(metadata[i].size, 8);
+ }
+
+ return -cnt;
+}
+
+/**
+ * tld_get_data() - Retrieve a pointer to the TLD associated with the name.
+ *
+ * @tld_obj: A pointer to a valid tld_object initialized by tld_object_init()
+ * @key: The cached key of the TLD in tld_key_map
+ * @name: The name of the key associated with a TLD
+ * @size: The size of the TLD. Must be a known constant value
+ *
+ * Return a pointer to the TLD associated with @name; NULL if not found or @size is too
+ * big. @key is used to cache the key if the TLD is found to speed up subsequent calls.
+ * It should be defined as an member of tld_keys of tld_key_t type by the developer.
+ */
+#define tld_get_data(tld_obj, key, name, size) \
+ ({ \
+ void *data = NULL, *_data = (tld_obj)->data_map->data; \
+ long off = (tld_obj)->key_map->key.off; \
+ int cnt; \
+ \
+ if (likely(_data)) { \
+ if (likely(off > 0)) { \
+ barrier_var(off); \
+ if (likely(off < __PAGE_SIZE - size)) \
+ data = _data + off; \
+ } else { \
+ cnt = -(off); \
+ if (likely((tld_obj)->data_map->meta) && \
+ cnt < (tld_obj)->data_map->meta->cnt) { \
+ off = __tld_fetch_key(tld_obj, name, cnt); \
+ (tld_obj)->key_map->key.off = off; \
+ \
+ if (likely(off < __PAGE_SIZE - size)) { \
+ barrier_var(off); \
+ if (off > 0) \
+ data = _data + off; \
+ } \
+ } \
+ } \
+ } \
+ data; \
+ })
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/task_work.c b/tools/testing/selftests/bpf/progs/task_work.c
new file mode 100644
index 000000000000..23217f06a3ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_work.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char _license[] SEC("license") = "GPL";
+
+const void *user_ptr = NULL;
+
+struct elem {
+ char data[128];
+ struct bpf_task_work tw;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} hmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} arrmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} lrumap SEC(".maps");
+
+static int process_work(struct bpf_map *map, void *key, void *value)
+{
+ struct elem *work = value;
+
+ bpf_copy_from_user_str(work->data, sizeof(work->data), (const void *)user_ptr, 0);
+ return 0;
+}
+
+int key = 0;
+
+SEC("perf_event")
+int oncpu_hash_map(struct pt_regs *args)
+{
+ struct elem empty_work = { .data = { 0 } };
+ struct elem *work;
+ struct task_struct *task;
+ int err;
+
+ task = bpf_get_current_task_btf();
+ err = bpf_map_update_elem(&hmap, &key, &empty_work, BPF_NOEXIST);
+ if (err)
+ return 0;
+ work = bpf_map_lookup_elem(&hmap, &key);
+ if (!work)
+ return 0;
+
+ bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+int oncpu_array_map(struct pt_regs *args)
+{
+ struct elem *work;
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ work = bpf_map_lookup_elem(&arrmap, &key);
+ if (!work)
+ return 0;
+ bpf_task_work_schedule_signal(task, &work->tw, &arrmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+int oncpu_lru_map(struct pt_regs *args)
+{
+ struct elem empty_work = { .data = { 0 } };
+ struct elem *work;
+ struct task_struct *task;
+ int err;
+
+ task = bpf_get_current_task_btf();
+ work = bpf_map_lookup_elem(&lrumap, &key);
+ if (work)
+ return 0;
+ err = bpf_map_update_elem(&lrumap, &key, &empty_work, BPF_NOEXIST);
+ if (err)
+ return 0;
+ work = bpf_map_lookup_elem(&lrumap, &key);
+ if (!work || work->data[0])
+ return 0;
+ bpf_task_work_schedule_resume(task, &work->tw, &lrumap, process_work, NULL);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_work_fail.c b/tools/testing/selftests/bpf/progs/task_work_fail.c
new file mode 100644
index 000000000000..77fe8f28facd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_work_fail.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+const void *user_ptr = NULL;
+
+struct elem {
+ char data[128];
+ struct bpf_task_work tw;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} hmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} arrmap SEC(".maps");
+
+static int process_work(struct bpf_map *map, void *key, void *value)
+{
+ struct elem *work = value;
+
+ bpf_copy_from_user_str(work->data, sizeof(work->data), (const void *)user_ptr, 0);
+ return 0;
+}
+
+int key = 0;
+
+SEC("perf_event")
+__failure __msg("doesn't match map pointer in R3")
+int mismatch_map(struct pt_regs *args)
+{
+ struct elem *work;
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ work = bpf_map_lookup_elem(&arrmap, &key);
+ if (!work)
+ return 0;
+ bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+__failure __msg("arg#1 doesn't point to a map value")
+int no_map_task_work(struct pt_regs *args)
+{
+ struct task_struct *task;
+ struct bpf_task_work tw;
+
+ task = bpf_get_current_task_btf();
+ bpf_task_work_schedule_resume(task, &tw, &hmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+__failure __msg("Possibly NULL pointer passed to trusted arg1")
+int task_work_null(struct pt_regs *args)
+{
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ bpf_task_work_schedule_resume(task, NULL, &hmap, process_work, NULL);
+ return 0;
+}
+
+SEC("perf_event")
+__failure __msg("Possibly NULL pointer passed to trusted arg2")
+int map_null(struct pt_regs *args)
+{
+ struct elem *work;
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ work = bpf_map_lookup_elem(&arrmap, &key);
+ if (!work)
+ return 0;
+ bpf_task_work_schedule_resume(task, &work->tw, NULL, process_work, NULL);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_work_stress.c b/tools/testing/selftests/bpf/progs/task_work_stress.c
new file mode 100644
index 000000000000..90fca06fff56
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_work_stress.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+#define ENTRIES 128
+
+char _license[] SEC("license") = "GPL";
+
+__u64 callback_scheduled = 0;
+__u64 callback_success = 0;
+__u64 schedule_error = 0;
+__u64 delete_success = 0;
+
+struct elem {
+ __u32 count;
+ struct bpf_task_work tw;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, ENTRIES);
+ __type(key, int);
+ __type(value, struct elem);
+} hmap SEC(".maps");
+
+static int process_work(struct bpf_map *map, void *key, void *value)
+{
+ __sync_fetch_and_add(&callback_success, 1);
+ return 0;
+}
+
+SEC("syscall")
+int schedule_task_work(void *ctx)
+{
+ struct elem empty_work = {.count = 0};
+ struct elem *work;
+ int key = 0, err;
+
+ key = bpf_ktime_get_ns() % ENTRIES;
+ work = bpf_map_lookup_elem(&hmap, &key);
+ if (!work) {
+ bpf_map_update_elem(&hmap, &key, &empty_work, BPF_NOEXIST);
+ work = bpf_map_lookup_elem(&hmap, &key);
+ if (!work)
+ return 0;
+ }
+ err = bpf_task_work_schedule_signal(bpf_get_current_task_btf(), &work->tw, &hmap,
+ process_work, NULL);
+ if (err)
+ __sync_fetch_and_add(&schedule_error, 1);
+ else
+ __sync_fetch_and_add(&callback_scheduled, 1);
+ return 0;
+}
+
+SEC("syscall")
+int delete_task_work(void *ctx)
+{
+ int key = 0, err;
+
+ key = bpf_get_prandom_u32() % ENTRIES;
+ err = bpf_map_delete_elem(&hmap, &key);
+ if (!err)
+ __sync_fetch_and_add(&delete_success, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index f344c6835e84..26a53e54b8fa 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -22,6 +22,7 @@
#include "bpf_compiler.h"
#include "test_cls_redirect.h"
+#include "bpf_misc.h"
#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
@@ -31,9 +32,6 @@
#define INLINING __always_inline
#endif
-#define offsetofend(TYPE, MEMBER) \
- (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
-
#define IP_OFFSET_MASK (0x1FFF)
#define IP_MF (0x2000)
@@ -129,7 +127,7 @@ typedef uint8_t *net_ptr __attribute__((align_value(8)));
typedef struct buf {
struct __sk_buff *skb;
net_ptr head;
- /* NB: tail musn't have alignment other than 1, otherwise
+ /* NB: tail mustn't have alignment other than 1, otherwise
* LLVM will go and eliminate code, e.g. when checking packet lengths.
*/
uint8_t *const tail;
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
index d0f7670351e5..dfd4a2710391 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
@@ -494,7 +494,7 @@ static ret_t get_next_hop(struct bpf_dynptr *dynptr, __u64 *offset, encap_header
*offset += sizeof(*next_hop);
- /* Skip the remainig next hops (may be zero). */
+ /* Skip the remaining next hops (may be zero). */
return skip_next_hops(offset, encap->unigue.hop_count - encap->unigue.next_hop - 1);
}
diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c
index abb7344b531f..5edf3cdc213d 100644
--- a/tools/testing/selftests/bpf/progs/test_overhead.c
+++ b/tools/testing/selftests/bpf/progs/test_overhead.c
@@ -1,9 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
-#include <stdbool.h>
-#include <stddef.h>
-#include <linux/bpf.h>
-#include <linux/ptrace.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/test_pinning_devmap.c b/tools/testing/selftests/bpf/progs/test_pinning_devmap.c
new file mode 100644
index 000000000000..c855f8f87eff
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_pinning_devmap.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(pinning, LIBBPF_PIN_BY_NAME);
+} pinmap1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(pinning, LIBBPF_PIN_BY_NAME);
+} pinmap2 SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/test_task_local_data.c b/tools/testing/selftests/bpf/progs/test_task_local_data.c
new file mode 100644
index 000000000000..fffafc013044
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_task_local_data.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+
+#include "task_local_data.bpf.h"
+
+struct tld_keys {
+ tld_key_t value0;
+ tld_key_t value1;
+ tld_key_t value2;
+ tld_key_t value_not_exist;
+};
+
+struct test_tld_struct {
+ __u64 a;
+ __u64 b;
+ __u64 c;
+ __u64 d;
+};
+
+int test_value0;
+int test_value1;
+struct test_tld_struct test_value2;
+
+SEC("syscall")
+int task_main(void *ctx)
+{
+ struct tld_object tld_obj;
+ struct test_tld_struct *struct_p;
+ struct task_struct *task;
+ int err, *int_p;
+
+ task = bpf_get_current_task_btf();
+ err = tld_object_init(task, &tld_obj);
+ if (err)
+ return 1;
+
+ int_p = tld_get_data(&tld_obj, value0, "value0", sizeof(int));
+ if (int_p)
+ test_value0 = *int_p;
+ else
+ return 2;
+
+ int_p = tld_get_data(&tld_obj, value1, "value1", sizeof(int));
+ if (int_p)
+ test_value1 = *int_p;
+ else
+ return 3;
+
+ struct_p = tld_get_data(&tld_obj, value2, "value2", sizeof(struct test_tld_struct));
+ if (struct_p)
+ test_value2 = *struct_p;
+ else
+ return 4;
+
+ int_p = tld_get_data(&tld_obj, value_not_exist, "value_not_exist", sizeof(int));
+ if (int_p)
+ return 5;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
index 5f4e87ee949a..1ecdf4c54de4 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
@@ -14,10 +14,7 @@
#include <bpf/bpf_endian.h>
#define BPF_PROG_TEST_TCP_HDR_OPTIONS
#include "test_tcp_hdr_options.h"
-
-#ifndef sizeof_field
-#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
-#endif
+#include "bpf_misc.h"
__u8 test_kind = TCPOPT_EXP;
__u16 test_magic = 0xeB9F;
diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
index 540181c115a8..ef00d38b0a8d 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
@@ -23,7 +23,6 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
- __uint(max_entries, 2);
__type(key, int);
__type(value, __u32);
} perf_event_map SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/test_uprobe.c b/tools/testing/selftests/bpf/progs/test_uprobe.c
index 896c88a4960d..12f4065fca20 100644
--- a/tools/testing/selftests/bpf/progs/test_uprobe.c
+++ b/tools/testing/selftests/bpf/progs/test_uprobe.c
@@ -59,3 +59,41 @@ int BPF_UPROBE(test4)
test4_result = 1;
return 0;
}
+
+#if defined(__TARGET_ARCH_x86)
+struct pt_regs regs;
+
+SEC("uprobe")
+int BPF_UPROBE(test_regs_change)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ ctx->ax = regs.ax;
+ ctx->cx = regs.cx;
+ ctx->dx = regs.dx;
+ ctx->r8 = regs.r8;
+ ctx->r9 = regs.r9;
+ ctx->r10 = regs.r10;
+ ctx->r11 = regs.r11;
+ ctx->di = regs.di;
+ ctx->si = regs.si;
+ return 0;
+}
+
+unsigned long ip;
+
+SEC("uprobe")
+int BPF_UPROBE(test_regs_change_ip)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ ctx->ip = ip;
+ return 0;
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c
index 096488f47fbc..a78c87537b07 100644
--- a/tools/testing/selftests/bpf/progs/test_usdt.c
+++ b/tools/testing/selftests/bpf/progs/test_usdt.c
@@ -107,4 +107,35 @@ int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
return 0;
}
+int usdt_sib_called;
+u64 usdt_sib_cookie;
+int usdt_sib_arg_cnt;
+int usdt_sib_arg_ret;
+short usdt_sib_arg;
+int usdt_sib_arg_size;
+
+/*
+ * usdt_sib is only tested on x86-related architectures, so it requires
+ * manual attach since auto-attach will panic tests under other architectures
+ */
+SEC("usdt")
+int usdt_sib(struct pt_regs *ctx)
+{
+ long tmp;
+
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt_sib_called, 1);
+
+ usdt_sib_cookie = bpf_usdt_cookie(ctx);
+ usdt_sib_arg_cnt = bpf_usdt_arg_cnt(ctx);
+
+ usdt_sib_arg_ret = bpf_usdt_arg(ctx, 0, &tmp);
+ usdt_sib_arg = (short)tmp;
+ usdt_sib_arg_size = bpf_usdt_arg_size(ctx, 0);
+
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c
new file mode 100644
index 000000000000..814e2a980e97
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+SEC("xdp")
+int xdp_devmap(struct xdp_md *ctx)
+{
+ return ctx->egress_ifindex;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __array(values, int (void *));
+} xdp_map SEC(".maps") = {
+ .values = {
+ [0] = (void *)&xdp_devmap,
+ },
+};
+
+SEC("xdp")
+int xdp_entry(struct xdp_md *ctx)
+{
+ bpf_tail_call(ctx, &xdp_map, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
index fcf6ca14f2ea..d79cb74b571e 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
@@ -1,8 +1,11 @@
+#include <stdbool.h>
#include <linux/bpf.h>
+#include <linux/errno.h>
#include <linux/if_ether.h>
#include <linux/pkt_cls.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_kfuncs.h"
#define META_SIZE 32
@@ -23,6 +26,8 @@ struct {
__uint(value_size, META_SIZE);
} test_result SEC(".maps");
+bool test_pass;
+
SEC("tc")
int ing_cls(struct __sk_buff *ctx)
{
@@ -40,6 +45,231 @@ int ing_cls(struct __sk_buff *ctx)
return TC_ACT_SHOT;
}
+/* Read from metadata using bpf_dynptr_read helper */
+SEC("tc")
+int ing_cls_dynptr_read(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr meta;
+ const __u32 zero = 0;
+ __u8 *dst;
+
+ dst = bpf_map_lookup_elem(&test_result, &zero);
+ if (!dst)
+ return TC_ACT_SHOT;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_read(dst, META_SIZE, &meta, 0, 0);
+
+ return TC_ACT_SHOT;
+}
+
+/* Write to metadata using bpf_dynptr_write helper */
+SEC("tc")
+int ing_cls_dynptr_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *src;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE);
+ if (!src)
+ return TC_ACT_SHOT;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_write(&meta, 0, src, META_SIZE, 0);
+
+ return TC_ACT_UNSPEC; /* pass */
+}
+
+/* Read from metadata using read-only dynptr slice */
+SEC("tc")
+int ing_cls_dynptr_slice(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr meta;
+ const __u32 zero = 0;
+ __u8 *dst, *src;
+
+ dst = bpf_map_lookup_elem(&test_result, &zero);
+ if (!dst)
+ return TC_ACT_SHOT;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ src = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE);
+ if (!src)
+ return TC_ACT_SHOT;
+
+ __builtin_memcpy(dst, src, META_SIZE);
+
+ return TC_ACT_SHOT;
+}
+
+/* Write to metadata using writeable dynptr slice */
+SEC("tc")
+int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ __u8 *src, *dst;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE);
+ if (!src)
+ return TC_ACT_SHOT;
+
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ dst = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE);
+ if (!dst)
+ return TC_ACT_SHOT;
+
+ __builtin_memcpy(dst, src, META_SIZE);
+
+ return TC_ACT_UNSPEC; /* pass */
+}
+
+/* Read skb metadata in chunks from various offsets in different ways. */
+SEC("tc")
+int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr meta;
+ const __u32 chunk_len = META_SIZE / 4;
+ const __u32 zero = 0;
+ __u8 *dst, *src;
+
+ dst = bpf_map_lookup_elem(&test_result, &zero);
+ if (!dst)
+ return TC_ACT_SHOT;
+
+ /* 1. Regular read */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_read(dst, chunk_len, &meta, 0, 0);
+ dst += chunk_len;
+
+ /* 2. Read from an offset-adjusted dynptr */
+ bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta));
+ bpf_dynptr_read(dst, chunk_len, &meta, 0, 0);
+ dst += chunk_len;
+
+ /* 3. Read at an offset */
+ bpf_dynptr_read(dst, chunk_len, &meta, chunk_len, 0);
+ dst += chunk_len;
+
+ /* 4. Read from a slice starting at an offset */
+ src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len);
+ if (!src)
+ return TC_ACT_SHOT;
+ __builtin_memcpy(dst, src, chunk_len);
+
+ return TC_ACT_SHOT;
+}
+
+/* Write skb metadata in chunks at various offsets in different ways. */
+SEC("tc")
+int ing_cls_dynptr_offset_wr(struct __sk_buff *ctx)
+{
+ const __u32 chunk_len = META_SIZE / 4;
+ __u8 payload[META_SIZE];
+ struct bpf_dynptr meta;
+ __u8 *dst, *src;
+
+ bpf_skb_load_bytes(ctx, sizeof(struct ethhdr), payload, sizeof(payload));
+ src = payload;
+
+ /* 1. Regular write */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ bpf_dynptr_write(&meta, 0, src, chunk_len, 0);
+ src += chunk_len;
+
+ /* 2. Write to an offset-adjusted dynptr */
+ bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta));
+ bpf_dynptr_write(&meta, 0, src, chunk_len, 0);
+ src += chunk_len;
+
+ /* 3. Write at an offset */
+ bpf_dynptr_write(&meta, chunk_len, src, chunk_len, 0);
+ src += chunk_len;
+
+ /* 4. Write to a slice starting at an offset */
+ dst = bpf_dynptr_slice_rdwr(&meta, 2 * chunk_len, NULL, chunk_len);
+ if (!dst)
+ return TC_ACT_SHOT;
+ __builtin_memcpy(dst, src, chunk_len);
+
+ return TC_ACT_UNSPEC; /* pass */
+}
+
+/* Pass an OOB offset to dynptr read, write, adjust, slice. */
+SEC("tc")
+int ing_cls_dynptr_offset_oob(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr meta;
+ __u8 md, *p;
+ int err;
+
+ err = bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (err)
+ goto fail;
+
+ /* read offset OOB */
+ err = bpf_dynptr_read(&md, sizeof(md), &meta, META_SIZE, 0);
+ if (err != -E2BIG)
+ goto fail;
+
+ /* write offset OOB */
+ err = bpf_dynptr_write(&meta, META_SIZE, &md, sizeof(md), 0);
+ if (err != -E2BIG)
+ goto fail;
+
+ /* adjust end offset OOB */
+ err = bpf_dynptr_adjust(&meta, 0, META_SIZE + 1);
+ if (err != -ERANGE)
+ goto fail;
+
+ /* adjust start offset OOB */
+ err = bpf_dynptr_adjust(&meta, META_SIZE + 1, META_SIZE + 1);
+ if (err != -ERANGE)
+ goto fail;
+
+ /* slice offset OOB */
+ p = bpf_dynptr_slice(&meta, META_SIZE, NULL, sizeof(*p));
+ if (p)
+ goto fail;
+
+ /* slice rdwr offset OOB */
+ p = bpf_dynptr_slice_rdwr(&meta, META_SIZE, NULL, sizeof(*p));
+ if (p)
+ goto fail;
+
+ return TC_ACT_UNSPEC;
+fail:
+ return TC_ACT_SHOT;
+}
+
+/* Reserve and clear space for metadata but don't populate it */
+SEC("xdp")
+int ing_xdp_zalloc_meta(struct xdp_md *ctx)
+{
+ struct ethhdr *eth = ctx_ptr(ctx, data);
+ __u8 *meta;
+ int ret;
+
+ /* Drop any non-test packets */
+ if (eth + 1 > ctx_ptr(ctx, data_end))
+ return XDP_DROP;
+ if (eth->h_proto != 0)
+ return XDP_DROP;
+
+ ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
+ if (ret < 0)
+ return XDP_DROP;
+
+ meta = ctx_ptr(ctx, data_meta);
+ if (meta + META_SIZE > ctx_ptr(ctx, data))
+ return XDP_DROP;
+
+ __builtin_memset(meta, 0, META_SIZE);
+
+ return XDP_PASS;
+}
+
SEC("xdp")
int ing_xdp(struct xdp_md *ctx)
{
@@ -73,4 +303,193 @@ int ing_xdp(struct xdp_md *ctx)
return XDP_PASS;
}
+/*
+ * Check that skb->data_meta..skb->data is empty if prog writes to packet
+ * _payload_ using packet pointers. Applies only to cloned skbs.
+ */
+SEC("tc")
+int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx)
+{
+ struct ethhdr *eth = ctx_ptr(ctx, data);
+
+ if (eth + 1 > ctx_ptr(ctx, data_end))
+ goto out;
+ /* Ignore non-test packets */
+ if (eth->h_proto != 0)
+ goto out;
+
+ /* Expect no metadata */
+ if (ctx->data_meta != ctx->data)
+ goto out;
+
+ /* Packet write to trigger unclone in prologue */
+ eth->h_proto = 42;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that skb->data_meta..skb->data is empty if prog writes to packet
+ * _metadata_ using packet pointers. Applies only to cloned skbs.
+ */
+SEC("tc")
+int clone_data_meta_empty_on_meta_write(struct __sk_buff *ctx)
+{
+ struct ethhdr *eth = ctx_ptr(ctx, data);
+ __u8 *md = ctx_ptr(ctx, data_meta);
+
+ if (eth + 1 > ctx_ptr(ctx, data_end))
+ goto out;
+ /* Ignore non-test packets */
+ if (eth->h_proto != 0)
+ goto out;
+
+ if (md + 1 > ctx_ptr(ctx, data)) {
+ /* Expect no metadata */
+ test_pass = true;
+ } else {
+ /* Metadata write to trigger unclone in prologue */
+ *md = 42;
+ }
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that skb_meta dynptr is writable but empty if prog writes to packet
+ * _payload_ using a dynptr slice. Applies only to cloned skbs.
+ */
+SEC("tc")
+int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ struct ethhdr *eth;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (eth->h_proto != 0)
+ goto out;
+
+ /* Expect no metadata */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0)
+ goto out;
+
+ /* Packet write to trigger unclone in prologue */
+ eth->h_proto = 42;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that skb_meta dynptr is writable but empty if prog writes to packet
+ * _metadata_ using a dynptr slice. Applies only to cloned skbs.
+ */
+SEC("tc")
+int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ const struct ethhdr *eth;
+ __u8 *md;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (eth->h_proto != 0)
+ goto out;
+
+ /* Expect no metadata */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0)
+ goto out;
+
+ /* Metadata write to trigger unclone in prologue */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+ if (md)
+ *md = 42;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that skb_meta dynptr is read-only before prog writes to packet payload
+ * using dynptr_write helper. Applies only to cloned skbs.
+ */
+SEC("tc")
+int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ const struct ethhdr *eth;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (eth->h_proto != 0)
+ goto out;
+
+ /* Expect read-only metadata before unclone */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE)
+ goto out;
+
+ /* Helper write to payload will unclone the packet */
+ bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0);
+
+ /* Expect no metadata after unclone */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != 0)
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
+/*
+ * Check that skb_meta dynptr is read-only if prog writes to packet
+ * metadata using dynptr_write helper. Applies only to cloned skbs.
+ */
+SEC("tc")
+int clone_dynptr_rdonly_before_meta_dynptr_write(struct __sk_buff *ctx)
+{
+ struct bpf_dynptr data, meta;
+ const struct ethhdr *eth;
+
+ bpf_dynptr_from_skb(ctx, 0, &data);
+ eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
+ if (!eth)
+ goto out;
+ /* Ignore non-test packets */
+ if (eth->h_proto != 0)
+ goto out;
+
+ /* Expect read-only metadata */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE)
+ goto out;
+
+ /* Metadata write. Expect failure. */
+ bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+ if (bpf_dynptr_write(&meta, 0, "x", 1, 0) != -EINVAL)
+ goto out;
+
+ test_pass = true;
+out:
+ return TC_ACT_SHOT;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c b/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c
new file mode 100644
index 000000000000..c41a21413eaa
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+int xdpf_sz;
+int sinfo_sz;
+int data_len;
+int pull_len;
+
+#define XDP_PACKET_HEADROOM 256
+
+SEC("xdp.frags")
+int xdp_find_sizes(struct xdp_md *ctx)
+{
+ xdpf_sz = sizeof(struct xdp_frame);
+ sinfo_sz = __PAGE_SIZE - XDP_PACKET_HEADROOM -
+ (ctx->data_end - ctx->data);
+
+ return XDP_PASS;
+}
+
+SEC("xdp.frags")
+int xdp_pull_data_prog(struct xdp_md *ctx)
+{
+ __u8 *data_end = (void *)(long)ctx->data_end;
+ __u8 *data = (void *)(long)ctx->data;
+ __u8 *val_p;
+ int err;
+
+ if (data_len != data_end - data)
+ return XDP_DROP;
+
+ err = bpf_xdp_pull_data(ctx, pull_len);
+ if (err)
+ return XDP_DROP;
+
+ val_p = (void *)(long)ctx->data + 1024;
+ if (val_p + 1 > (void *)(long)ctx->data_end)
+ return XDP_DROP;
+
+ if (*val_p != 0xbb)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/timer_interrupt.c b/tools/testing/selftests/bpf/progs/timer_interrupt.c
new file mode 100644
index 000000000000..19180a455f40
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_interrupt.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define CLOCK_MONOTONIC 1
+
+int preempt_count;
+int in_interrupt;
+int in_interrupt_cb;
+
+struct elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+static int timer_in_interrupt(void *map, int *key, struct bpf_timer *timer)
+{
+ preempt_count = get_preempt_count();
+ in_interrupt_cb = bpf_in_interrupt();
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test_timer_interrupt)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&array, &key);
+ if (!timer)
+ return 0;
+
+ in_interrupt = bpf_in_interrupt();
+ bpf_timer_init(timer, &array, CLOCK_MONOTONIC);
+ bpf_timer_set_callback(timer, timer_in_interrupt);
+ bpf_timer_start(timer, 0, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tracing_struct.c b/tools/testing/selftests/bpf/progs/tracing_struct.c
index c435a3a8328a..d460732e2023 100644
--- a/tools/testing/selftests/bpf/progs/tracing_struct.c
+++ b/tools/testing/selftests/bpf/progs/tracing_struct.c
@@ -18,6 +18,18 @@ struct bpf_testmod_struct_arg_3 {
int b[];
};
+union bpf_testmod_union_arg_1 {
+ char a;
+ short b;
+ struct bpf_testmod_struct_arg_1 arg;
+};
+
+union bpf_testmod_union_arg_2 {
+ int a;
+ long b;
+ struct bpf_testmod_struct_arg_2 arg;
+};
+
long t1_a_a, t1_a_b, t1_b, t1_c, t1_ret, t1_nregs;
__u64 t1_reg0, t1_reg1, t1_reg2, t1_reg3;
long t2_a, t2_b_a, t2_b_b, t2_c, t2_ret;
@@ -26,6 +38,9 @@ long t4_a_a, t4_b, t4_c, t4_d, t4_e_a, t4_e_b, t4_ret;
long t5_ret;
int t6;
+long ut1_a_a, ut1_b, ut1_c;
+long ut2_a, ut2_b_a, ut2_b_b;
+
SEC("fentry/bpf_testmod_test_struct_arg_1")
int BPF_PROG2(test_struct_arg_1, struct bpf_testmod_struct_arg_2, a, int, b, int, c)
{
@@ -130,4 +145,22 @@ int BPF_PROG2(test_struct_arg_11, struct bpf_testmod_struct_arg_3 *, a)
return 0;
}
+SEC("fexit/bpf_testmod_test_union_arg_1")
+int BPF_PROG2(test_union_arg_1, union bpf_testmod_union_arg_1, a, int, b, int, c)
+{
+ ut1_a_a = a.arg.a;
+ ut1_b = b;
+ ut1_c = c;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_union_arg_2")
+int BPF_PROG2(test_union_arg_2, int, a, union bpf_testmod_union_arg_2, b)
+{
+ ut2_a = a;
+ ut2_b_a = b.arg.a;
+ ut2_b_b = b.arg.b;
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 044a6d78923e..3d5f30c29ae3 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -97,6 +97,12 @@ int bench_trigger_kprobe_multi(void *ctx)
return 0;
}
+SEC("?kprobe.multi/bpf_get_numa_node_id")
+int bench_kprobe_multi_empty(void *ctx)
+{
+ return 0;
+}
+
SEC("?kretprobe.multi/bpf_get_numa_node_id")
int bench_trigger_kretprobe_multi(void *ctx)
{
@@ -104,6 +110,12 @@ int bench_trigger_kretprobe_multi(void *ctx)
return 0;
}
+SEC("?kretprobe.multi/bpf_get_numa_node_id")
+int bench_kretprobe_multi_empty(void *ctx)
+{
+ return 0;
+}
+
SEC("?fentry/bpf_get_numa_node_id")
int bench_trigger_fentry(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/uprobe_syscall.c b/tools/testing/selftests/bpf/progs/uprobe_syscall.c
index 8a4fa6c7ef59..e08c31669e5a 100644
--- a/tools/testing/selftests/bpf/progs/uprobe_syscall.c
+++ b/tools/testing/selftests/bpf/progs/uprobe_syscall.c
@@ -7,8 +7,8 @@ struct pt_regs regs;
char _license[] SEC("license") = "GPL";
-SEC("uretprobe//proc/self/exe:uretprobe_regs_trigger")
-int uretprobe(struct pt_regs *ctx)
+SEC("uprobe")
+int probe(struct pt_regs *ctx)
{
__builtin_memcpy(&regs, ctx, sizeof(regs));
return 0;
diff --git a/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c b/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c
index 0d7f1a7db2e2..915d38591bf6 100644
--- a/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c
+++ b/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/usdt.bpf.h>
#include <string.h>
struct pt_regs regs;
@@ -8,10 +10,64 @@ struct pt_regs regs;
char _license[] SEC("license") = "GPL";
int executed = 0;
+int pid;
+
+SEC("uprobe")
+int BPF_UPROBE(test_uprobe)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ executed++;
+ return 0;
+}
+
+SEC("uretprobe")
+int BPF_URETPROBE(test_uretprobe)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ executed++;
+ return 0;
+}
+
+SEC("uprobe.multi")
+int test_uprobe_multi(struct pt_regs *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ executed++;
+ return 0;
+}
SEC("uretprobe.multi")
-int test(struct pt_regs *regs)
+int test_uretprobe_multi(struct pt_regs *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ executed++;
+ return 0;
+}
+
+SEC("uprobe.session")
+int test_uprobe_session(struct pt_regs *ctx)
{
- executed = 1;
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ executed++;
+ return 0;
+}
+
+SEC("usdt")
+int test_usdt(struct pt_regs *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ executed++;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/uretprobe_stack.c b/tools/testing/selftests/bpf/progs/uretprobe_stack.c
index 9fdcf396b8f4..a2951e2f1711 100644
--- a/tools/testing/selftests/bpf/progs/uretprobe_stack.c
+++ b/tools/testing/selftests/bpf/progs/uretprobe_stack.c
@@ -26,8 +26,8 @@ int usdt_len;
SEC("uprobe//proc/self/exe:target_1")
int BPF_UPROBE(uprobe_1)
{
- /* target_1 is recursive wit depth of 2, so we capture two separate
- * stack traces, depending on which occurence it is
+ /* target_1 is recursive with depth of 2, so we capture two separate
+ * stack traces, depending on which occurrence it is
*/
static bool recur = false;
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
index 9dbdf123542d..f19e15400b3e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
@@ -240,6 +240,7 @@ int big_alloc2(void *ctx)
return 5;
bpf_arena_free_pages(&arena, (void __arena *)pg, 2);
page[i] = NULL;
+ barrier();
page[i + 1] = NULL;
cond_break;
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index 87a2c60d86e6..0a72e0228ea9 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -926,7 +926,7 @@ l1_%=: r0 = 0; \
SEC("socket")
__description("bounds check for non const xor src dst")
__success __log_level(2)
-__msg("5: (af) r0 ^= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
+__msg("5: (af) r0 ^= r6 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
__naked void non_const_xor_src_dst(void)
{
asm volatile (" \
@@ -947,7 +947,7 @@ __naked void non_const_xor_src_dst(void)
SEC("socket")
__description("bounds check for non const or src dst")
__success __log_level(2)
-__msg("5: (4f) r0 |= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
+__msg("5: (4f) r0 |= r6 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
__naked void non_const_or_src_dst(void)
{
asm volatile (" \
@@ -968,7 +968,7 @@ __naked void non_const_or_src_dst(void)
SEC("socket")
__description("bounds check for non const mul regs")
__success __log_level(2)
-__msg("5: (2f) r0 *= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=3825,var_off=(0x0; 0xfff))")
+__msg("5: (2f) r0 *= r6 ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=3825,var_off=(0x0; 0xfff))")
__naked void non_const_mul_regs(void)
{
asm volatile (" \
@@ -1241,7 +1241,7 @@ l0_%=: r0 = 0; \
SEC("tc")
__description("multiply mixed sign bounds. test 1")
__success __log_level(2)
-__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=umin=0x1bc16d5cd4927ee1,smax=umax=0x1bc16d674ec80000,smax32=0x7ffffeff,umax32=0xfffffeff,var_off=(0x1bc16d4000000000; 0x3ffffffeff))")
+__msg("r6 *= r7 {{.*}}; R6=scalar(smin=umin=0x1bc16d5cd4927ee1,smax=umax=0x1bc16d674ec80000,smax32=0x7ffffeff,umax32=0xfffffeff,var_off=(0x1bc16d4000000000; 0x3ffffffeff))")
__naked void mult_mixed0_sign(void)
{
asm volatile (
@@ -1264,7 +1264,7 @@ __naked void mult_mixed0_sign(void)
SEC("tc")
__description("multiply mixed sign bounds. test 2")
__success __log_level(2)
-__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=smin32=-100,smax=smax32=200)")
+__msg("r6 *= r7 {{.*}}; R6=scalar(smin=smin32=-100,smax=smax32=200)")
__naked void mult_mixed1_sign(void)
{
asm volatile (
@@ -1287,7 +1287,7 @@ __naked void mult_mixed1_sign(void)
SEC("tc")
__description("multiply negative bounds")
__success __log_level(2)
-__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=umin=smin32=umin32=0x3ff280b0,smax=umax=smax32=umax32=0x3fff0001,var_off=(0x3ff00000; 0xf81ff))")
+__msg("r6 *= r7 {{.*}}; R6=scalar(smin=umin=smin32=umin32=0x3ff280b0,smax=umax=smax32=umax32=0x3fff0001,var_off=(0x3ff00000; 0xf81ff))")
__naked void mult_sign_bounds(void)
{
asm volatile (
@@ -1311,7 +1311,7 @@ __naked void mult_sign_bounds(void)
SEC("tc")
__description("multiply bounds that don't cross signed boundary")
__success __log_level(2)
-__msg("r8 *= r6 {{.*}}; R6_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=11,var_off=(0x0; 0xb)) R8_w=scalar(smin=0,smax=umax=0x7b96bb0a94a3a7cd,var_off=(0x0; 0x7fffffffffffffff))")
+__msg("r8 *= r6 {{.*}}; R6=scalar(smin=smin32=0,smax=umax=smax32=umax32=11,var_off=(0x0; 0xb)) R8=scalar(smin=0,smax=umax=0x7b96bb0a94a3a7cd,var_off=(0x0; 0x7fffffffffffffff))")
__naked void mult_no_sign_crossing(void)
{
asm volatile (
@@ -1331,7 +1331,7 @@ __naked void mult_no_sign_crossing(void)
SEC("tc")
__description("multiplication overflow, result in unbounded reg. test 1")
__success __log_level(2)
-__msg("r6 *= r7 {{.*}}; R6_w=scalar()")
+__msg("r6 *= r7 {{.*}}; R6=scalar()")
__naked void mult_unsign_ovf(void)
{
asm volatile (
@@ -1353,7 +1353,7 @@ __naked void mult_unsign_ovf(void)
SEC("tc")
__description("multiplication overflow, result in unbounded reg. test 2")
__success __log_level(2)
-__msg("r6 *= r7 {{.*}}; R6_w=scalar()")
+__msg("r6 *= r7 {{.*}}; R6=scalar()")
__naked void mult_sign_ovf(void)
{
asm volatile (
@@ -1376,7 +1376,7 @@ __naked void mult_sign_ovf(void)
SEC("socket")
__description("64-bit addition, all outcomes overflow")
__success __log_level(2)
-__msg("5: (0f) r3 += r3 {{.*}} R3_w=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe)")
+__msg("5: (0f) r3 += r3 {{.*}} R3=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe)")
__retval(0)
__naked void add64_full_overflow(void)
{
@@ -1396,7 +1396,7 @@ __naked void add64_full_overflow(void)
SEC("socket")
__description("64-bit addition, partial overflow, result in unbounded reg")
__success __log_level(2)
-__msg("4: (0f) r3 += r3 {{.*}} R3_w=scalar()")
+__msg("4: (0f) r3 += r3 {{.*}} R3=scalar()")
__retval(0)
__naked void add64_partial_overflow(void)
{
@@ -1416,7 +1416,7 @@ __naked void add64_partial_overflow(void)
SEC("socket")
__description("32-bit addition overflow, all outcomes overflow")
__success __log_level(2)
-__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=umin=umin32=0x40000000,smax=umax=umax32=0xfffffffe,var_off=(0x0; 0xffffffff))")
+__msg("4: (0c) w3 += w3 {{.*}} R3=scalar(smin=umin=umin32=0x40000000,smax=umax=umax32=0xfffffffe,var_off=(0x0; 0xffffffff))")
__retval(0)
__naked void add32_full_overflow(void)
{
@@ -1436,7 +1436,7 @@ __naked void add32_full_overflow(void)
SEC("socket")
__description("32-bit addition, partial overflow, result in unbounded u32 bounds")
__success __log_level(2)
-__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
+__msg("4: (0c) w3 += w3 {{.*}} R3=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
__retval(0)
__naked void add32_partial_overflow(void)
{
@@ -1456,7 +1456,7 @@ __naked void add32_partial_overflow(void)
SEC("socket")
__description("64-bit subtraction, all outcomes underflow")
__success __log_level(2)
-__msg("6: (1f) r3 -= r1 {{.*}} R3_w=scalar(umin=1,umax=0x8000000000000000)")
+__msg("6: (1f) r3 -= r1 {{.*}} R3=scalar(umin=1,umax=0x8000000000000000)")
__retval(0)
__naked void sub64_full_overflow(void)
{
@@ -1477,7 +1477,7 @@ __naked void sub64_full_overflow(void)
SEC("socket")
__description("64-bit subtraction, partial overflow, result in unbounded reg")
__success __log_level(2)
-__msg("3: (1f) r3 -= r2 {{.*}} R3_w=scalar()")
+__msg("3: (1f) r3 -= r2 {{.*}} R3=scalar()")
__retval(0)
__naked void sub64_partial_overflow(void)
{
@@ -1496,7 +1496,7 @@ __naked void sub64_partial_overflow(void)
SEC("socket")
__description("32-bit subtraction overflow, all outcomes underflow")
__success __log_level(2)
-__msg("5: (1c) w3 -= w1 {{.*}} R3_w=scalar(smin=umin=umin32=1,smax=umax=umax32=0x80000000,var_off=(0x0; 0xffffffff))")
+__msg("5: (1c) w3 -= w1 {{.*}} R3=scalar(smin=umin=umin32=1,smax=umax=umax32=0x80000000,var_off=(0x0; 0xffffffff))")
__retval(0)
__naked void sub32_full_overflow(void)
{
@@ -1517,7 +1517,7 @@ __naked void sub32_full_overflow(void)
SEC("socket")
__description("32-bit subtraction, partial overflow, result in unbounded u32 bounds")
__success __log_level(2)
-__msg("3: (1c) w3 -= w2 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
+__msg("3: (1c) w3 -= w2 {{.*}} R3=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
__retval(0)
__naked void sub32_partial_overflow(void)
{
@@ -1617,7 +1617,7 @@ l0_%=: r0 = 0; \
SEC("socket")
__description("bounds deduction cross sign boundary, positive overlap")
__success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS)
-__msg("3: (2d) if r0 > r1 {{.*}} R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=127,var_off=(0x0; 0x7f))")
+__msg("3: (2d) if r0 > r1 {{.*}} R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=127,var_off=(0x0; 0x7f))")
__retval(0)
__naked void bounds_deduct_positive_overlap(void)
{
@@ -1650,7 +1650,7 @@ l0_%=: r0 = 0; \
SEC("socket")
__description("bounds deduction cross sign boundary, two overlaps")
__failure __flag(BPF_F_TEST_REG_INVARIANTS)
-__msg("3: (2d) if r0 > r1 {{.*}} R0_w=scalar(smin=smin32=-128,smax=smax32=127,umax=0xffffffffffffff80)")
+__msg("3: (2d) if r0 > r1 {{.*}} R0=scalar(smin=smin32=-128,smax=smax32=127,umax=0xffffffffffffff80)")
__msg("frame pointer is read only")
__naked void bounds_deduct_two_overlaps(void)
{
@@ -1668,4 +1668,45 @@ l0_%=: r0 = 0; \
: __clobber_all);
}
+SEC("socket")
+__description("dead jne branch due to disagreeing tnums")
+__success __log_level(2)
+__naked void jne_disagreeing_tnums(void *ctx)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ w0 = w0; \
+ r0 >>= 30; \
+ r0 <<= 30; \
+ r1 = r0; \
+ r1 += 1024; \
+ if r1 != r0 goto +1; \
+ r10 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("dead jeq branch due to disagreeing tnums")
+__success __log_level(2)
+__naked void jeq_disagreeing_tnums(void *ctx)
+{
+ asm volatile(" \
+ call %[bpf_get_prandom_u32]; \
+ w0 = w0; \
+ r0 >>= 30; \
+ r0 <<= 30; \
+ r1 = r0; \
+ r1 += 1024; \
+ if r1 == r0 goto +1; \
+ exit; \
+ r10 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
index c258b0722e04..fb4fa465d67c 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
@@ -660,19 +660,24 @@ __naked void may_goto_interaction_x86_64(void)
SEC("raw_tp")
__arch_arm64
-__log_level(4) __msg("stack depth 16")
-/* may_goto counter at -16 */
-__xlated("0: *(u64 *)(r10 -16) =")
-__xlated("1: r1 = 1")
-__xlated("2: call bpf_get_smp_processor_id")
+__log_level(4) __msg("stack depth 24")
+/* may_goto counter at -24 */
+__xlated("0: *(u64 *)(r10 -24) =")
+/* may_goto timestamp at -16 */
+__xlated("1: *(u64 *)(r10 -16) =")
+__xlated("2: r1 = 1")
+__xlated("3: call bpf_get_smp_processor_id")
/* may_goto expansion starts */
-__xlated("3: r11 = *(u64 *)(r10 -16)")
-__xlated("4: if r11 == 0x0 goto pc+3")
-__xlated("5: r11 -= 1")
-__xlated("6: *(u64 *)(r10 -16) = r11")
+__xlated("4: r11 = *(u64 *)(r10 -24)")
+__xlated("5: if r11 == 0x0 goto pc+6")
+__xlated("6: r11 -= 1")
+__xlated("7: if r11 != 0x0 goto pc+2")
+__xlated("8: r11 = -24")
+__xlated("9: call unknown")
+__xlated("10: *(u64 *)(r10 -24) = r11")
/* may_goto expansion ends */
-__xlated("7: *(u64 *)(r10 -8) = r1")
-__xlated("8: exit")
+__xlated("11: *(u64 *)(r10 -8) = r1")
+__xlated("12: exit")
__success
__naked void may_goto_interaction_arm64(void)
{
diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx.c b/tools/testing/selftests/bpf/progs/verifier_ctx.c
index 424463094760..5ebf7d9bcc55 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ctx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ctx.c
@@ -5,8 +5,6 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
-#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
-
SEC("tc")
__description("context stores via BPF_ATOMIC")
__failure __msg("BPF_ATOMIC stores into R1 ctx is not allowed")
@@ -264,4 +262,34 @@ narrow_load("sockops", bpf_sock_ops, skb_hwtstamp);
unaligned_access("flow_dissector", __sk_buff, data);
unaligned_access("netfilter", bpf_nf_ctx, skb);
+#define padding_access(type, ctx, prev_field, sz) \
+ SEC(type) \
+ __description("access on " #ctx " padding after " #prev_field) \
+ __naked void padding_ctx_access_##ctx(void) \
+ { \
+ asm volatile (" \
+ r1 = *(u%[size] *)(r1 + %[off]); \
+ r0 = 0; \
+ exit;" \
+ : \
+ : __imm_const(size, sz * 8), \
+ __imm_const(off, offsetofend(struct ctx, prev_field)) \
+ : __clobber_all); \
+ }
+
+__failure __msg("invalid bpf_context access")
+padding_access("cgroup/bind4", bpf_sock_addr, msg_src_ip6[3], 4);
+
+__success
+padding_access("sk_lookup", bpf_sk_lookup, remote_port, 2);
+
+__failure __msg("invalid bpf_context access")
+padding_access("tc", __sk_buff, tstamp_type, 2);
+
+__failure __msg("invalid bpf_context access")
+padding_access("cgroup/post_bind4", bpf_sock, dst_port, 2);
+
+__failure __msg("invalid bpf_context access")
+padding_access("sk_reuseport", sk_reuseport_md, hash, 4);
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
index 181da86ba5f0..6630a92b1b47 100644
--- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
+++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
@@ -215,7 +215,7 @@ __weak int subprog_untrusted(const volatile struct task_struct *restrict task __
SEC("tp_btf/sys_enter")
__success
__log_level(2)
-__msg("r1 = {{.*}}; {{.*}}R1_w=trusted_ptr_task_struct()")
+__msg("r1 = {{.*}}; {{.*}}R1=trusted_ptr_task_struct()")
__msg("Func#1 ('subprog_untrusted') is global and assumed valid.")
__msg("Validating subprog_untrusted() func#1...")
__msg(": R1=untrusted_ptr_task_struct")
@@ -278,7 +278,7 @@ __weak int subprog_enum_untrusted(enum bpf_attach_type *p __arg_untrusted)
SEC("tp_btf/sys_enter")
__success
__log_level(2)
-__msg("r1 = {{.*}}; {{.*}}R1_w=trusted_ptr_task_struct()")
+__msg("r1 = {{.*}}; {{.*}}R1=trusted_ptr_task_struct()")
__msg("Func#1 ('subprog_void_untrusted') is global and assumed valid.")
__msg("Validating subprog_void_untrusted() func#1...")
__msg(": R1=rdonly_untrusted_mem(sz=0)")
diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
index 52edee41caf6..c8494b682c31 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
@@ -3,6 +3,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include "bpf_arena_common.h"
#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
@@ -10,6 +11,12 @@
defined(__TARGET_ARCH_loongarch)) && \
__clang_major__ >= 18
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 1);
+} arena SEC(".maps");
+
SEC("socket")
__description("LDSX, S8")
__success __success_unpriv __retval(-2)
@@ -65,7 +72,7 @@ __naked void ldsx_s32(void)
SEC("socket")
__description("LDSX, S8 range checking, privileged")
__log_level(2) __success __retval(1)
-__msg("R1_w=scalar(smin=smin32=-128,smax=smax32=127)")
+__msg("R1=scalar(smin=smin32=-128,smax=smax32=127)")
__naked void ldsx_s8_range_priv(void)
{
asm volatile (
@@ -256,6 +263,175 @@ __naked void ldsx_ctx_8(void)
: __clobber_all);
}
+SEC("syscall")
+__description("Arena LDSX Disasm")
+__success
+__arch_x86_64
+__jited("movslq 0x10(%rax,%r12), %r14")
+__jited("movswq 0x18(%rax,%r12), %r14")
+__jited("movsbq 0x20(%rax,%r12), %r14")
+__jited("movslq 0x10(%rdi,%r12), %r15")
+__jited("movswq 0x18(%rdi,%r12), %r15")
+__jited("movsbq 0x20(%rdi,%r12), %r15")
+__arch_arm64
+__jited("add x11, x7, x28")
+__jited("ldrsw x21, [x11, #0x10]")
+__jited("add x11, x7, x28")
+__jited("ldrsh x21, [x11, #0x18]")
+__jited("add x11, x7, x28")
+__jited("ldrsb x21, [x11, #0x20]")
+__jited("add x11, x0, x28")
+__jited("ldrsw x22, [x11, #0x10]")
+__jited("add x11, x0, x28")
+__jited("ldrsh x22, [x11, #0x18]")
+__jited("add x11, x0, x28")
+__jited("ldrsb x22, [x11, #0x20]")
+__naked void arena_ldsx_disasm(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = %[numa_no_node];"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = r0;"
+ "r8 = *(s32 *)(r0 + 16);"
+ "r8 = *(s16 *)(r0 + 24);"
+ "r8 = *(s8 *)(r0 + 32);"
+ "r9 = *(s32 *)(r1 + 16);"
+ "r9 = *(s16 *)(r1 + 24);"
+ "r9 = *(s8 *)(r1 + 32);"
+ "r0 = 0;"
+ "exit;"
+ :: __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena),
+ __imm_const(numa_no_node, NUMA_NO_NODE)
+ : __clobber_all
+ );
+}
+
+SEC("syscall")
+__description("Arena LDSX Exception")
+__success __retval(0)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_exception(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r0 = 0xdeadbeef;"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = 0x3fe;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "r0 = *(s8 *)(r0 + 0);"
+ "exit;"
+ :
+ : __imm_addr(arena)
+ : __clobber_all
+ );
+}
+
+SEC("syscall")
+__description("Arena LDSX, S8")
+__success __retval(-1)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_s8(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = %[numa_no_node];"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = 0x3fe;"
+ "*(u64 *)(r0 + 0) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s8 *)(r0 + 0);"
+#else
+ "r0 = *(s8 *)(r0 + 7);"
+#endif
+ "r0 >>= 1;"
+ "exit;"
+ :: __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena),
+ __imm_const(numa_no_node, NUMA_NO_NODE)
+ : __clobber_all
+ );
+}
+
+SEC("syscall")
+__description("Arena LDSX, S16")
+__success __retval(-1)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_s16(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = %[numa_no_node];"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = 0x3fffe;"
+ "*(u64 *)(r0 + 0) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s16 *)(r0 + 0);"
+#else
+ "r0 = *(s16 *)(r0 + 6);"
+#endif
+ "r0 >>= 1;"
+ "exit;"
+ :: __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena),
+ __imm_const(numa_no_node, NUMA_NO_NODE)
+ : __clobber_all
+ );
+}
+
+SEC("syscall")
+__description("Arena LDSX, S32")
+__success __retval(-1)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_s32(void *ctx)
+{
+ asm volatile (
+ "r1 = %[arena] ll;"
+ "r2 = 0;"
+ "r3 = 1;"
+ "r4 = %[numa_no_node];"
+ "r5 = 0;"
+ "call %[bpf_arena_alloc_pages];"
+ "r0 = addr_space_cast(r0, 0x0, 0x1);"
+ "r1 = 0xfffffffe;"
+ "*(u64 *)(r0 + 0) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s32 *)(r0 + 0);"
+#else
+ "r0 = *(s32 *)(r0 + 4);"
+#endif
+ "r0 >>= 1;"
+ "exit;"
+ :: __imm(bpf_arena_alloc_pages),
+ __imm_addr(arena),
+ __imm_const(numa_no_node, NUMA_NO_NODE)
+ : __clobber_all
+ );
+}
+
+/* to retain debug info for BTF generation */
+void kfunc_root(void)
+{
+ bpf_arena_alloc_pages(0, 0, 0, 0, 0);
+}
+
#else
SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_live_stack.c b/tools/testing/selftests/bpf/progs/verifier_live_stack.c
new file mode 100644
index 000000000000..c0e808509268
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_live_stack.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, long long);
+} map SEC(".maps");
+
+SEC("socket")
+__log_level(2)
+__msg("(0) frame 0 insn 2 +written -8")
+__msg("(0) frame 0 insn 1 +live -24")
+__msg("(0) frame 0 insn 1 +written -8")
+__msg("(0) frame 0 insn 0 +live -8,-24")
+__msg("(0) frame 0 insn 0 +written -8")
+__msg("(0) live stack update done in 2 iterations")
+__naked void simple_read_simple_write(void)
+{
+ asm volatile (
+ "r1 = *(u64 *)(r10 - 8);"
+ "r2 = *(u64 *)(r10 - 24);"
+ "*(u64 *)(r10 - 8) = r1;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("(0) frame 0 insn 1 +live -8")
+__not_msg("(0) frame 0 insn 1 +written")
+__msg("(0) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 1 +live -16")
+__msg("(0) frame 0 insn 1 +written -32")
+__msg("(0) live stack update done in 2 iterations")
+__naked void read_write_join(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 > 42 goto 1f;"
+ "r0 = *(u64 *)(r10 - 8);"
+ "*(u64 *)(r10 - 32) = r0;"
+ "*(u64 *)(r10 - 40) = r0;"
+ "exit;"
+"1:"
+ "r0 = *(u64 *)(r10 - 16);"
+ "*(u64 *)(r10 - 32) = r0;"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("2: (25) if r0 > 0x2a goto pc+1")
+__msg("7: (95) exit")
+__msg("(0) frame 0 insn 2 +written -16")
+__msg("(0) live stack update done in 2 iterations")
+__msg("7: (95) exit")
+__not_msg("(0) frame 0 insn 2")
+__msg("(0) live stack update done in 1 iterations")
+__naked void must_write_not_same_slot(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "r1 = -8;"
+ "if r0 > 42 goto 1f;"
+ "r1 = -16;"
+"1:"
+ "r2 = r10;"
+ "r2 += r1;"
+ "*(u64 *)(r2 + 0) = r0;"
+ "exit;"
+ :: __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("(0) frame 0 insn 0 +written -8,-16")
+__msg("(0) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 0 +written -8")
+__msg("(0) live stack update done in 2 iterations")
+__naked void must_write_not_same_type(void)
+{
+ asm volatile (
+ "*(u64*)(r10 - 8) = 0;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "r1 = %[map] ll;"
+ "call %[bpf_map_lookup_elem];"
+ "if r0 != 0 goto 1f;"
+ "r0 = r10;"
+ "r0 += -16;"
+"1:"
+ "*(u64 *)(r0 + 0) = 42;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("(2,4) frame 0 insn 4 +written -8")
+__msg("(2,4) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 2 +written -8")
+__msg("(0) live stack update done in 2 iterations")
+__naked void caller_stack_write(void)
+{
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "call write_first_param;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void write_first_param(void)
+{
+ asm volatile (
+ "*(u64 *)(r1 + 0) = 7;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+/* caller_stack_read() function */
+__msg("2: .12345.... (85) call pc+4")
+__msg("5: .12345.... (85) call pc+1")
+__msg("6: 0......... (95) exit")
+/* read_first_param() function */
+__msg("7: .1........ (79) r0 = *(u64 *)(r1 +0)")
+__msg("8: 0......... (95) exit")
+/* update for callsite at (2) */
+__msg("(2,7) frame 0 insn 7 +live -8")
+__msg("(2,7) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 2 +live -8")
+__msg("(0) live stack update done in 2 iterations")
+/* update for callsite at (5) */
+__msg("(5,7) frame 0 insn 7 +live -16")
+__msg("(5,7) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 5 +live -16")
+__msg("(0) live stack update done in 2 iterations")
+__naked void caller_stack_read(void)
+{
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "call read_first_param;"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call read_first_param;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+static __used __naked void read_first_param(void)
+{
+ asm volatile (
+ "r0 = *(u64 *)(r1 + 0);"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__log_level(2)
+/* read_first_param2() function */
+__msg(" 9: .1........ (79) r0 = *(u64 *)(r1 +0)")
+__msg("10: .......... (b7) r0 = 0")
+__msg("11: 0......... (05) goto pc+0")
+__msg("12: 0......... (95) exit")
+/*
+ * The purpose of the test is to check that checkpoint in
+ * read_first_param2() stops path traversal. This will only happen if
+ * verifier understands that fp[0]-8 at insn (12) is not alive.
+ */
+__msg("12: safe")
+__msg("processed 20 insns")
+__naked void caller_stack_pruning(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 == 42 goto 1f;"
+ "r0 = %[map] ll;"
+"1:"
+ "*(u64 *)(r10 - 8) = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ /*
+ * fp[0]-8 is either pointer to map or a scalar,
+ * preventing state pruning at checkpoint created for call.
+ */
+ "call read_first_param2;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm_addr(map)
+ : __clobber_all);
+}
+
+static __used __naked void read_first_param2(void)
+{
+ asm volatile (
+ "r0 = *(u64 *)(r1 + 0);"
+ "r0 = 0;"
+ /*
+ * Checkpoint at goto +0 should fire,
+ * as caller stack fp[0]-8 is not alive at this point.
+ */
+ "goto +0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure
+__msg("R1 type=scalar expected=map_ptr")
+__naked void caller_stack_pruning_callback(void)
+{
+ asm volatile (
+ "r0 = %[map] ll;"
+ "*(u64 *)(r10 - 8) = r0;"
+ "r1 = 2;"
+ "r2 = loop_cb ll;"
+ "r3 = r10;"
+ "r3 += -8;"
+ "r4 = 0;"
+ /*
+ * fp[0]-8 is either pointer to map or a scalar,
+ * preventing state pruning at checkpoint created for call.
+ */
+ "call %[bpf_loop];"
+ "r0 = 42;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_loop),
+ __imm_addr(map)
+ : __clobber_all);
+}
+
+static __used __naked void loop_cb(void)
+{
+ asm volatile (
+ /*
+ * Checkpoint at function entry should not fire, as caller
+ * stack fp[0]-8 is alive at this point.
+ */
+ "r6 = r2;"
+ "r1 = *(u64 *)(r6 + 0);"
+ "*(u64*)(r10 - 8) = 7;"
+ "r2 = r10;"
+ "r2 += -8;"
+ "call %[bpf_map_lookup_elem];"
+ /*
+ * This should stop verifier on a second loop iteration,
+ * but only if verifier correctly maintains that fp[0]-8
+ * is still alive.
+ */
+ "*(u64 *)(r6 + 0) = 0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/*
+ * Because of a bug in verifier.c:compute_postorder()
+ * the program below overflowed traversal queue in that function.
+ */
+SEC("socket")
+__naked void syzbot_postorder_bug1(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "if r0 != 0 goto -1;"
+ "exit;"
+ ::: __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c
index e07b43b78fd2..fbdde80e7b90 100644
--- a/tools/testing/selftests/bpf/progs/verifier_loops1.c
+++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c
@@ -283,4 +283,25 @@ exit_%=: \
: __clobber_all);
}
+/*
+ * This test case triggered a bug in verifier.c:maybe_exit_scc().
+ * Speculative execution path reaches stack access instruction,
+ * stops and triggers maybe_exit_scc() w/o accompanying maybe_enter_scc() call.
+ */
+SEC("socket")
+__arch_x86_64
+__caps_unpriv(CAP_BPF)
+__naked void maybe_exit_scc_bug1(void)
+{
+ asm volatile (
+ "r0 = 100;"
+"1:"
+ /* Speculative execution path reaches and stops here. */
+ "*(u64 *)(r10 - 512) = r0;"
+ /* Condition is always false, but verifier speculatively executes the true branch. */
+ "if r0 <= 0x0 goto 1b;"
+ "exit;"
+ ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c
index 11a079145966..e2767d27d8aa 100644
--- a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c
@@ -70,10 +70,13 @@ __naked void bpf_map_ptr_write_rejected(void)
: __clobber_all);
}
+/* The first element of struct bpf_map is a SHA256 hash of 32 bytes, accessing
+ * into this array is valid. The opts field is now at offset 33.
+ */
SEC("socket")
__description("bpf_map_ptr: read non-existent field rejected")
__failure
-__msg("cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4")
+__msg("cannot access ptr member ops with moff 32 in struct bpf_map with off 33 size 4")
__failure_unpriv
__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN")
__flag(BPF_F_ANY_ALIGNMENT)
@@ -82,7 +85,7 @@ __naked void read_non_existent_field_rejected(void)
asm volatile (" \
r6 = 0; \
r1 = %[map_array_48b] ll; \
- r6 = *(u32*)(r1 + 1); \
+ r6 = *(u32*)(r1 + 33); \
r0 = 1; \
exit; \
" :
diff --git a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
index 3966d827f288..6d1edaef9213 100644
--- a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
+++ b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
@@ -9,6 +9,8 @@
SEC("raw_tp")
__description("may_goto 0")
__arch_x86_64
+__arch_s390x
+__arch_arm64
__xlated("0: r0 = 1")
__xlated("1: exit")
__success
@@ -27,6 +29,8 @@ __naked void may_goto_simple(void)
SEC("raw_tp")
__description("batch 2 of may_goto 0")
__arch_x86_64
+__arch_s390x
+__arch_arm64
__xlated("0: r0 = 1")
__xlated("1: exit")
__success
@@ -47,6 +51,8 @@ __naked void may_goto_batch_0(void)
SEC("raw_tp")
__description("may_goto batch with offsets 2/1/0")
__arch_x86_64
+__arch_s390x
+__arch_arm64
__xlated("0: r0 = 1")
__xlated("1: exit")
__success
@@ -69,8 +75,10 @@ __naked void may_goto_batch_1(void)
}
SEC("raw_tp")
-__description("may_goto batch with offsets 2/0 - x86_64")
+__description("may_goto batch with offsets 2/0")
__arch_x86_64
+__arch_s390x
+__arch_arm64
__xlated("0: *(u64 *)(r10 -16) = 65535")
__xlated("1: *(u64 *)(r10 -8) = 0")
__xlated("2: r11 = *(u64 *)(r10 -16)")
@@ -84,33 +92,7 @@ __xlated("9: r0 = 1")
__xlated("10: r0 = 2")
__xlated("11: exit")
__success
-__naked void may_goto_batch_2_x86_64(void)
-{
- asm volatile (
- ".8byte %[may_goto1];"
- ".8byte %[may_goto3];"
- "r0 = 1;"
- "r0 = 2;"
- "exit;"
- :
- : __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 2 /* offset */, 0)),
- __imm_insn(may_goto3, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0))
- : __clobber_all);
-}
-
-SEC("raw_tp")
-__description("may_goto batch with offsets 2/0 - arm64")
-__arch_arm64
-__xlated("0: *(u64 *)(r10 -8) = 8388608")
-__xlated("1: r11 = *(u64 *)(r10 -8)")
-__xlated("2: if r11 == 0x0 goto pc+3")
-__xlated("3: r11 -= 1")
-__xlated("4: *(u64 *)(r10 -8) = r11")
-__xlated("5: r0 = 1")
-__xlated("6: r0 = 2")
-__xlated("7: exit")
-__success
-__naked void may_goto_batch_2_arm64(void)
+__naked void may_goto_batch_2(void)
{
asm volatile (
".8byte %[may_goto1];"
diff --git a/tools/testing/selftests/bpf/progs/verifier_mul.c b/tools/testing/selftests/bpf/progs/verifier_mul.c
new file mode 100644
index 000000000000..7145fe3351d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_mul.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Nandakumar Edamana */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+/* Intended to test the abstract multiplication technique(s) used by
+ * the verifier. Using assembly to avoid compiler optimizations.
+ */
+SEC("fentry/bpf_fentry_test1")
+void BPF_PROG(mul_precise, int x)
+{
+ /* First, force the verifier to be uncertain about the value:
+ * unsigned int a = (bpf_get_prandom_u32() & 0x2) | 0x1;
+ *
+ * Assuming the verifier is using tnum, a must be tnum{.v=0x1, .m=0x2}.
+ * Then a * 0x3 would be m0m1 (m for uncertain). Added imprecision
+ * would cause the following to fail, because the required return value
+ * is 0:
+ * return (a * 0x3) & 0x4);
+ */
+ asm volatile ("\
+ call %[bpf_get_prandom_u32];\
+ r0 &= 0x2;\
+ r0 |= 0x1;\
+ r0 *= 0x3;\
+ r0 &= 0x4;\
+ if r0 != 0 goto l0_%=;\
+ r0 = 0;\
+ goto l1_%=;\
+l0_%=:\
+ r0 = 1;\
+l1_%=:\
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c
index 73fee2aec698..1fe090cd6744 100644
--- a/tools/testing/selftests/bpf/progs/verifier_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_precision.c
@@ -144,21 +144,21 @@ SEC("?raw_tp")
__success __log_level(2)
/*
* Without the bug fix there will be no history between "last_idx 3 first_idx 3"
- * and "parent state regs=" lines. "R0_w=6" parts are here to help anchor
+ * and "parent state regs=" lines. "R0=6" parts are here to help anchor
* expected log messages to the one specific mark_chain_precision operation.
*
* This is quite fragile: if verifier checkpointing heuristic changes, this
* might need adjusting.
*/
-__msg("2: (07) r0 += 1 ; R0_w=6")
+__msg("2: (07) r0 += 1 ; R0=6")
__msg("3: (35) if r0 >= 0xa goto pc+1")
__msg("mark_precise: frame0: last_idx 3 first_idx 3 subseq_idx -1")
__msg("mark_precise: frame0: regs=r0 stack= before 2: (07) r0 += 1")
__msg("mark_precise: frame0: regs=r0 stack= before 1: (07) r0 += 1")
__msg("mark_precise: frame0: regs=r0 stack= before 4: (05) goto pc-4")
__msg("mark_precise: frame0: regs=r0 stack= before 3: (35) if r0 >= 0xa goto pc+1")
-__msg("mark_precise: frame0: parent state regs= stack=: R0_rw=P4")
-__msg("3: R0_w=6")
+__msg("mark_precise: frame0: parent state regs= stack=: R0=P4")
+__msg("3: R0=6")
__naked int state_loop_first_last_equal(void)
{
asm volatile (
@@ -233,8 +233,8 @@ __naked void bpf_cond_op_not_r10(void)
SEC("lsm.s/socket_connect")
__success __log_level(2)
-__msg("0: (b7) r0 = 1 ; R0_w=1")
-__msg("1: (84) w0 = -w0 ; R0_w=0xffffffff")
+__msg("0: (b7) r0 = 1 ; R0=1")
+__msg("1: (84) w0 = -w0 ; R0=0xffffffff")
__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1")
__msg("mark_precise: frame0: regs=r0 stack= before 1: (84) w0 = -w0")
__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1")
@@ -268,8 +268,8 @@ __naked int bpf_neg_3(void)
SEC("lsm.s/socket_connect")
__success __log_level(2)
-__msg("0: (b7) r0 = 1 ; R0_w=1")
-__msg("1: (87) r0 = -r0 ; R0_w=-1")
+__msg("0: (b7) r0 = 1 ; R0=1")
+__msg("1: (87) r0 = -r0 ; R0=-1")
__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1")
__msg("mark_precise: frame0: regs=r0 stack= before 1: (87) r0 = -r0")
__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1")
diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
index 7c5e5e6d10eb..c0ce690ddb68 100644
--- a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
+++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
@@ -349,11 +349,11 @@ __naked void precision_two_ids(void)
SEC("socket")
__success __log_level(2)
__flag(BPF_F_TEST_STATE_FREQ)
-/* check thar r0 and r6 have different IDs after 'if',
+/* check that r0 and r6 have different IDs after 'if',
* collect_linked_regs() can't tie more than 6 registers for a single insn.
*/
__msg("8: (25) if r0 > 0x7 goto pc+0 ; R0=scalar(id=1")
-__msg("9: (bf) r6 = r6 ; R6_w=scalar(id=2")
+__msg("9: (bf) r6 = r6 ; R6=scalar(id=2")
/* check that r{0-5} are marked precise after 'if' */
__msg("frame0: regs=r0 stack= before 8: (25) if r0 > 0x7 goto pc+0")
__msg("frame0: parent state regs=r0,r1,r2,r3,r4,r5 stack=:")
@@ -779,12 +779,12 @@ __success
__retval(0)
/* Check that verifier believes r1/r0 are zero at exit */
__log_level(2)
-__msg("4: (77) r1 >>= 32 ; R1_w=0")
-__msg("5: (bf) r0 = r1 ; R0_w=0 R1_w=0")
+__msg("4: (77) r1 >>= 32 ; R1=0")
+__msg("5: (bf) r0 = r1 ; R0=0 R1=0")
__msg("6: (95) exit")
__msg("from 3 to 4")
-__msg("4: (77) r1 >>= 32 ; R1_w=0")
-__msg("5: (bf) r0 = r1 ; R0_w=0 R1_w=0")
+__msg("4: (77) r1 >>= 32 ; R1=0")
+__msg("5: (bf) r0 = r1 ; R0=0 R1=0")
__msg("6: (95) exit")
/* Verify that statements to randomize upper half of r1 had not been
* generated.
diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c
index 0d5e56dffabb..2b4610b53382 100644
--- a/tools/testing/selftests/bpf/progs/verifier_sock.c
+++ b/tools/testing/selftests/bpf/progs/verifier_sock.c
@@ -1,14 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/* Converted from tools/testing/selftests/bpf/verifier/sock.c */
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
-#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
-#define offsetofend(TYPE, MEMBER) \
- (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
-
struct {
__uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
__uint(max_entries, 1);
@@ -1073,6 +1069,48 @@ int invalidate_pkt_pointers_from_global_func(struct __sk_buff *sk)
}
__noinline
+long xdp_pull_data2(struct xdp_md *x, __u32 len)
+{
+ return bpf_xdp_pull_data(x, len);
+}
+
+__noinline
+long xdp_pull_data1(struct xdp_md *x, __u32 len)
+{
+ return xdp_pull_data2(x, len);
+}
+
+/* global function calls bpf_xdp_pull_data(), which invalidates packet
+ * pointers established before global function call.
+ */
+SEC("xdp")
+__failure __msg("invalid mem access")
+int invalidate_xdp_pkt_pointers_from_global_func(struct xdp_md *x)
+{
+ int *p = (void *)(long)x->data;
+
+ if ((void *)(p + 1) > (void *)(long)x->data_end)
+ return XDP_DROP;
+ xdp_pull_data1(x, 0);
+ *p = 42; /* this is unsafe */
+ return XDP_PASS;
+}
+
+/* XDP packet changing kfunc calls invalidate packet pointers */
+SEC("xdp")
+__failure __msg("invalid mem access")
+int invalidate_xdp_pkt_pointers(struct xdp_md *x)
+{
+ int *p = (void *)(long)x->data;
+
+ if ((void *)(p + 1) > (void *)(long)x->data_end)
+ return XDP_DROP;
+ bpf_xdp_pull_data(x, 0);
+ *p = 42; /* this is unsafe */
+ return XDP_PASS;
+}
+
+__noinline
int tail_call(struct __sk_buff *sk)
{
bpf_tail_call_static(sk, &jmp_table, 0);
diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
index 1e5a511e8494..7a13dbd794b2 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -506,17 +506,17 @@ SEC("raw_tp")
__log_level(2)
__success
/* fp-8 is spilled IMPRECISE value zero (represented by a zero value fake reg) */
-__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=0")
+__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8=0")
/* but fp-16 is spilled IMPRECISE zero const reg */
-__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=0 R10=fp0 fp-16_w=0")
+__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0=0 R10=fp0 fp-16=0")
/* validate that assigning R2 from STACK_SPILL with zero value doesn't mark register
* precise immediately; if necessary, it will be marked precise later
*/
-__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=0")
+__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2=0 R10=fp0 fp-8=0")
/* similarly, when R2 is assigned from spilled register, it is initially
* imprecise, but will be marked precise later once it is used in precise context
*/
-__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2_w=0 R10=fp0 fp-16_w=0")
+__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2=0 R10=fp0 fp-16=0")
__msg("11: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 11 first_idx 0 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 10: (71) r2 = *(u8 *)(r10 -9)")
@@ -598,7 +598,7 @@ __log_level(2)
__success
/* fp-4 is STACK_ZERO */
__msg("2: (62) *(u32 *)(r10 -4) = 0 ; R10=fp0 fp-8=0000????")
-__msg("4: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8=0000????")
+__msg("4: (71) r2 = *(u8 *)(r10 -1) ; R2=0 R10=fp0 fp-8=0000????")
__msg("5: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 4: (71) r2 = *(u8 *)(r10 -1)")
@@ -640,25 +640,25 @@ SEC("raw_tp")
__log_level(2) __flag(BPF_F_TEST_STATE_FREQ)
__success
/* make sure fp-8 is IMPRECISE fake register spill */
-__msg("3: (7a) *(u64 *)(r10 -8) = 1 ; R10=fp0 fp-8_w=1")
+__msg("3: (7a) *(u64 *)(r10 -8) = 1 ; R10=fp0 fp-8=1")
/* and fp-16 is spilled IMPRECISE const reg */
-__msg("5: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16_w=1")
+__msg("5: (7b) *(u64 *)(r10 -16) = r0 ; R0=1 R10=fp0 fp-16=1")
/* validate load from fp-8, which was initialized using BPF_ST_MEM */
-__msg("8: (79) r2 = *(u64 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=1")
+__msg("8: (79) r2 = *(u64 *)(r10 -8) ; R2=1 R10=fp0 fp-8=1")
__msg("9: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 8: (79) r2 = *(u64 *)(r10 -8)")
__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6")
/* note, fp-8 is precise, fp-16 is not yet precise, we'll get there */
-__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_w=1")
+__msg("mark_precise: frame0: parent state regs= stack=-8: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=P1 fp-16=1")
__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -16) = r0")
__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
__msg("mark_precise: frame0: regs= stack=-8 before 3: (7a) *(u64 *)(r10 -8) = 1")
-__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
/* validate load from fp-16, which was initialized using BPF_STX_MEM */
-__msg("12: (79) r2 = *(u64 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=1")
+__msg("12: (79) r2 = *(u64 *)(r10 -16) ; R2=1 R10=fp0 fp-16=1")
__msg("13: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 12: (79) r2 = *(u64 *)(r10 -16)")
@@ -668,12 +668,12 @@ __msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2")
__msg("mark_precise: frame0: regs= stack=-16 before 8: (79) r2 = *(u64 *)(r10 -8)")
__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6")
/* now both fp-8 and fp-16 are precise, very good */
-__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_rw=P1")
+__msg("mark_precise: frame0: parent state regs= stack=-16: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=P1 fp-16=P1")
__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
__msg("mark_precise: frame0: regs= stack=-16 before 5: (7b) *(u64 *)(r10 -16) = r0")
__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
-__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
__naked void stack_load_preserves_const_precision(void)
{
asm volatile (
@@ -719,22 +719,22 @@ __success
/* make sure fp-8 is 32-bit FAKE subregister spill */
__msg("3: (62) *(u32 *)(r10 -8) = 1 ; R10=fp0 fp-8=????1")
/* but fp-16 is spilled IMPRECISE zero const reg */
-__msg("5: (63) *(u32 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16=????1")
+__msg("5: (63) *(u32 *)(r10 -16) = r0 ; R0=1 R10=fp0 fp-16=????1")
/* validate load from fp-8, which was initialized using BPF_ST_MEM */
-__msg("8: (61) r2 = *(u32 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=????1")
+__msg("8: (61) r2 = *(u32 *)(r10 -8) ; R2=1 R10=fp0 fp-8=????1")
__msg("9: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 8: (61) r2 = *(u32 *)(r10 -8)")
__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6")
-__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16=????1")
+__msg("mark_precise: frame0: parent state regs= stack=-8: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=????P1 fp-16=????1")
__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
__msg("mark_precise: frame0: regs= stack=-8 before 5: (63) *(u32 *)(r10 -16) = r0")
__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
__msg("mark_precise: frame0: regs= stack=-8 before 3: (62) *(u32 *)(r10 -8) = 1")
-__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
/* validate load from fp-16, which was initialized using BPF_STX_MEM */
-__msg("12: (61) r2 = *(u32 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=????1")
+__msg("12: (61) r2 = *(u32 *)(r10 -16) ; R2=1 R10=fp0 fp-16=????1")
__msg("13: (0f) r1 += r2")
__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r2 stack= before 12: (61) r2 = *(u32 *)(r10 -16)")
@@ -743,12 +743,12 @@ __msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2
__msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2")
__msg("mark_precise: frame0: regs= stack=-16 before 8: (61) r2 = *(u32 *)(r10 -8)")
__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6")
-__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16_r=????P1")
+__msg("mark_precise: frame0: parent state regs= stack=-16: R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=????P1 fp-16=????P1")
__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
__msg("mark_precise: frame0: regs= stack=-16 before 5: (63) *(u32 *)(r10 -16) = r0")
__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
-__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
__naked void stack_load_preserves_const_precision_subreg(void)
{
asm volatile (
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
index 9d415f7ce599..ac3e418c2a96 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -105,7 +105,7 @@ __msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4")
__msg("mark_precise: frame0: regs=r0 stack= before 3: (57) r0 &= 3")
__msg("mark_precise: frame0: regs=r0 stack= before 10: (95) exit")
__msg("mark_precise: frame1: regs=r0 stack= before 9: (bf) r0 = (s8)r10")
-__msg("7: R0_w=scalar")
+__msg("7: R0=scalar")
__naked int fp_precise_subprog_result(void)
{
asm volatile (
@@ -141,7 +141,7 @@ __msg("mark_precise: frame1: regs=r0 stack= before 10: (bf) r0 = (s8)r1")
* anyways, at which point we'll break precision chain
*/
__msg("mark_precise: frame1: regs=r1 stack= before 9: (bf) r1 = r10")
-__msg("7: R0_w=scalar")
+__msg("7: R0=scalar")
__naked int sneaky_fp_precise_subprog_result(void)
{
asm volatile (
@@ -681,7 +681,7 @@ __msg("mark_precise: frame0: last_idx 10 first_idx 7 subseq_idx -1")
__msg("mark_precise: frame0: regs=r7 stack= before 9: (bf) r1 = r8")
__msg("mark_precise: frame0: regs=r7 stack= before 8: (27) r7 *= 4")
__msg("mark_precise: frame0: regs=r7 stack= before 7: (79) r7 = *(u64 *)(r10 -8)")
-__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=2 R6_w=1 R8_rw=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8_rw=P1")
+__msg("mark_precise: frame0: parent state regs= stack=-8: R0=2 R6=1 R8=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8=P1")
__msg("mark_precise: frame0: last_idx 18 first_idx 0 subseq_idx 7")
__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
__msg("mark_precise: frame1: regs= stack= before 17: (0f) r0 += r2")
diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c
index 1d36d01b746e..f345466bca68 100644
--- a/tools/testing/selftests/bpf/progs/verifier_var_off.c
+++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c
@@ -114,8 +114,8 @@ __naked void stack_write_priv_vs_unpriv(void)
}
/* Similar to the previous test, but this time also perform a read from the
- * address written to with a variable offset. The read is allowed, showing that,
- * after a variable-offset write, a priviledged program can read the slots that
+ * address written to with a variable offet. The read is allowed, showing that,
+ * after a variable-offset write, a privileged program can read the slots that
* were in the range of that write (even if the verifier doesn't actually know if
* the slot being read was really written to or not.
*
@@ -157,7 +157,7 @@ __naked void stack_write_followed_by_read(void)
SEC("socket")
__description("variable-offset stack write clobbers spilled regs")
__failure
-/* In the priviledged case, dereferencing a spilled-and-then-filled
+/* In the privileged case, dereferencing a spilled-and-then-filled
* register is rejected because the previous variable offset stack
* write might have overwritten the spilled pointer (i.e. we lose track
* of the spilled register when we analyze the write).
diff --git a/tools/testing/selftests/bpf/test_kmods/Makefile b/tools/testing/selftests/bpf/test_kmods/Makefile
index d4e50c4509c9..63c4d3f6a12f 100644
--- a/tools/testing/selftests/bpf/test_kmods/Makefile
+++ b/tools/testing/selftests/bpf/test_kmods/Makefile
@@ -8,7 +8,7 @@ Q = @
endif
MODULES = bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \
- bpf_test_modorder_y.ko
+ bpf_test_modorder_y.ko bpf_test_rqspinlock.ko
$(foreach m,$(MODULES),$(eval obj-m += $(m:.ko=.o)))
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c
new file mode 100644
index 000000000000..769206fc70e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/prandom.h>
+#include <asm/rqspinlock.h>
+#include <linux/perf_event.h>
+#include <linux/kthread.h>
+#include <linux/atomic.h>
+#include <linux/slab.h>
+
+static struct perf_event_attr hw_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .size = sizeof(struct perf_event_attr),
+ .pinned = 1,
+ .disabled = 1,
+ .sample_period = 100000,
+};
+
+static rqspinlock_t lock_a;
+static rqspinlock_t lock_b;
+
+static struct perf_event **rqsl_evts;
+static int rqsl_nevts;
+
+static bool test_ab = false;
+module_param(test_ab, bool, 0644);
+MODULE_PARM_DESC(test_ab, "Test ABBA situations instead of AA situations");
+
+static struct task_struct **rqsl_threads;
+static int rqsl_nthreads;
+static atomic_t rqsl_ready_cpus = ATOMIC_INIT(0);
+
+static int pause = 0;
+
+static bool nmi_locks_a(int cpu)
+{
+ return (cpu & 1) && test_ab;
+}
+
+static int rqspinlock_worker_fn(void *arg)
+{
+ int cpu = smp_processor_id();
+ unsigned long flags;
+ int ret;
+
+ if (cpu) {
+ atomic_inc(&rqsl_ready_cpus);
+
+ while (!kthread_should_stop()) {
+ if (READ_ONCE(pause)) {
+ msleep(1000);
+ continue;
+ }
+ if (nmi_locks_a(cpu))
+ ret = raw_res_spin_lock_irqsave(&lock_b, flags);
+ else
+ ret = raw_res_spin_lock_irqsave(&lock_a, flags);
+ mdelay(20);
+ if (nmi_locks_a(cpu) && !ret)
+ raw_res_spin_unlock_irqrestore(&lock_b, flags);
+ else if (!ret)
+ raw_res_spin_unlock_irqrestore(&lock_a, flags);
+ cpu_relax();
+ }
+ return 0;
+ }
+
+ while (!kthread_should_stop()) {
+ int expected = rqsl_nthreads > 0 ? rqsl_nthreads - 1 : 0;
+ int ready = atomic_read(&rqsl_ready_cpus);
+
+ if (ready == expected && !READ_ONCE(pause)) {
+ for (int i = 0; i < rqsl_nevts; i++)
+ perf_event_enable(rqsl_evts[i]);
+ pr_err("Waiting 5 secs to pause the test\n");
+ msleep(1000 * 5);
+ WRITE_ONCE(pause, 1);
+ pr_err("Paused the test\n");
+ } else {
+ msleep(1000);
+ cpu_relax();
+ }
+ }
+ return 0;
+}
+
+static void nmi_cb(struct perf_event *event, struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+ unsigned long flags;
+ int ret;
+
+ if (!cpu || READ_ONCE(pause))
+ return;
+
+ if (nmi_locks_a(cpu))
+ ret = raw_res_spin_lock_irqsave(&lock_a, flags);
+ else
+ ret = raw_res_spin_lock_irqsave(test_ab ? &lock_b : &lock_a, flags);
+
+ mdelay(10);
+
+ if (nmi_locks_a(cpu) && !ret)
+ raw_res_spin_unlock_irqrestore(&lock_a, flags);
+ else if (!ret)
+ raw_res_spin_unlock_irqrestore(test_ab ? &lock_b : &lock_a, flags);
+}
+
+static void free_rqsl_threads(void)
+{
+ int i;
+
+ if (rqsl_threads) {
+ for_each_online_cpu(i) {
+ if (rqsl_threads[i])
+ kthread_stop(rqsl_threads[i]);
+ }
+ kfree(rqsl_threads);
+ }
+}
+
+static void free_rqsl_evts(void)
+{
+ int i;
+
+ if (rqsl_evts) {
+ for (i = 0; i < rqsl_nevts; i++) {
+ if (rqsl_evts[i])
+ perf_event_release_kernel(rqsl_evts[i]);
+ }
+ kfree(rqsl_evts);
+ }
+}
+
+static int bpf_test_rqspinlock_init(void)
+{
+ int i, ret;
+ int ncpus = num_online_cpus();
+
+ pr_err("Mode = %s\n", test_ab ? "ABBA" : "AA");
+
+ if (ncpus < 3)
+ return -ENOTSUPP;
+
+ raw_res_spin_lock_init(&lock_a);
+ raw_res_spin_lock_init(&lock_b);
+
+ rqsl_evts = kcalloc(ncpus - 1, sizeof(*rqsl_evts), GFP_KERNEL);
+ if (!rqsl_evts)
+ return -ENOMEM;
+ rqsl_nevts = ncpus - 1;
+
+ for (i = 1; i < ncpus; i++) {
+ struct perf_event *e;
+
+ e = perf_event_create_kernel_counter(&hw_attr, i, NULL, nmi_cb, NULL);
+ if (IS_ERR(e)) {
+ ret = PTR_ERR(e);
+ goto err_perf_events;
+ }
+ rqsl_evts[i - 1] = e;
+ }
+
+ rqsl_threads = kcalloc(ncpus, sizeof(*rqsl_threads), GFP_KERNEL);
+ if (!rqsl_threads) {
+ ret = -ENOMEM;
+ goto err_perf_events;
+ }
+ rqsl_nthreads = ncpus;
+
+ for_each_online_cpu(i) {
+ struct task_struct *t;
+
+ t = kthread_create(rqspinlock_worker_fn, NULL, "rqsl_w/%d", i);
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
+ goto err_threads_create;
+ }
+ kthread_bind(t, i);
+ rqsl_threads[i] = t;
+ wake_up_process(t);
+ }
+ return 0;
+
+err_threads_create:
+ free_rqsl_threads();
+err_perf_events:
+ free_rqsl_evts();
+ return ret;
+}
+
+module_init(bpf_test_rqspinlock_init);
+
+static void bpf_test_rqspinlock_exit(void)
+{
+ free_rqsl_threads();
+ free_rqsl_evts();
+}
+
+module_exit(bpf_test_rqspinlock_exit);
+
+MODULE_AUTHOR("Kumar Kartikeya Dwivedi");
+MODULE_DESCRIPTION("BPF rqspinlock stress test module");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
index e9e918cdf31f..8074bc5f6f20 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
@@ -62,6 +62,18 @@ struct bpf_testmod_struct_arg_5 {
long d;
};
+union bpf_testmod_union_arg_1 {
+ char a;
+ short b;
+ struct bpf_testmod_struct_arg_1 arg;
+};
+
+union bpf_testmod_union_arg_2 {
+ int a;
+ long b;
+ struct bpf_testmod_struct_arg_2 arg;
+};
+
__bpf_hook_start();
noinline int
@@ -129,6 +141,20 @@ bpf_testmod_test_struct_arg_9(u64 a, void *b, short c, int d, void *e, char f,
}
noinline int
+bpf_testmod_test_union_arg_1(union bpf_testmod_union_arg_1 a, int b, int c)
+{
+ bpf_testmod_test_struct_arg_result = a.arg.a + b + c;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_union_arg_2(int a, union bpf_testmod_union_arg_2 b)
+{
+ bpf_testmod_test_struct_arg_result = a + b.arg.a + b.arg.b;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) {
bpf_testmod_test_struct_arg_result = a->a;
return bpf_testmod_test_struct_arg_result;
@@ -218,6 +244,16 @@ __bpf_kfunc void bpf_kfunc_rcu_task_test(struct task_struct *ptr)
{
}
+__bpf_kfunc struct task_struct *bpf_kfunc_ret_rcu_test(void)
+{
+ return NULL;
+}
+
+__bpf_kfunc int *bpf_kfunc_ret_rcu_test_nostruct(int rdonly_buf_size)
+{
+ return NULL;
+}
+
__bpf_kfunc struct bpf_testmod_ctx *
bpf_testmod_ctx_create(int *err)
{
@@ -398,6 +434,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
struct bpf_testmod_struct_arg_3 *struct_arg3;
struct bpf_testmod_struct_arg_4 struct_arg4 = {21, 22};
struct bpf_testmod_struct_arg_5 struct_arg5 = {23, 24, 25, 26};
+ union bpf_testmod_union_arg_1 union_arg1 = { .arg = {1} };
+ union bpf_testmod_union_arg_2 union_arg2 = { .arg = {2, 3} };
int i = 1;
while (bpf_testmod_return_ptr(i))
@@ -415,6 +453,9 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
(void)bpf_testmod_test_struct_arg_9(16, (void *)17, 18, 19, (void *)20,
21, 22, struct_arg5, 27);
+ (void)bpf_testmod_test_union_arg_1(union_arg1, 4, 5);
+ (void)bpf_testmod_test_union_arg_2(6, union_arg2);
+
(void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2);
(void)trace_bpf_testmod_test_raw_tp_null_tp(NULL);
@@ -501,14 +542,20 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
#ifdef __x86_64__
static int
+uprobe_handler(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data)
+{
+ regs->cx = 0x87654321feebdaed;
+ return 0;
+}
+
+static int
uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func,
struct pt_regs *regs, __u64 *data)
{
regs->ax = 0x12345678deadbeef;
- regs->cx = 0x87654321feebdaed;
regs->r11 = (u64) -1;
- return true;
+ return 0;
}
struct testmod_uprobe {
@@ -520,6 +567,7 @@ struct testmod_uprobe {
static DEFINE_MUTEX(testmod_uprobe_mutex);
static struct testmod_uprobe uprobe = {
+ .consumer.handler = uprobe_handler,
.consumer.ret_handler = uprobe_ret_handler,
};
@@ -623,6 +671,8 @@ BTF_ID_FLAGS(func, bpf_kfunc_trusted_vma_test, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_kfunc_trusted_task_test, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_kfunc_trusted_num_test, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_kfunc_rcu_task_test, KF_RCU)
+BTF_ID_FLAGS(func, bpf_kfunc_ret_rcu_test, KF_RET_NULL | KF_RCU_PROTECTED)
+BTF_ID_FLAGS(func, bpf_kfunc_ret_rcu_test_nostruct, KF_RET_NULL | KF_RCU_PROTECTED)
BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_1)
@@ -1057,6 +1107,8 @@ __bpf_kfunc int bpf_kfunc_st_ops_inc10(struct st_ops_args *args)
return args->a;
}
+__bpf_kfunc int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id);
+
BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc)
BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
@@ -1097,6 +1149,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_prologue, KF_TRUSTED_ARGS | KF_SLEEPABL
BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_pro_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
BTF_ID_FLAGS(func, bpf_kfunc_st_ops_inc10, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_multi_st_ops_test_1, KF_TRUSTED_ARGS)
BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids)
static int bpf_testmod_ops_init(struct btf *btf)
@@ -1528,6 +1581,114 @@ static struct bpf_struct_ops testmod_st_ops = {
.owner = THIS_MODULE,
};
+struct hlist_head multi_st_ops_list;
+static DEFINE_SPINLOCK(multi_st_ops_lock);
+
+static int multi_st_ops_init(struct btf *btf)
+{
+ spin_lock_init(&multi_st_ops_lock);
+ INIT_HLIST_HEAD(&multi_st_ops_list);
+
+ return 0;
+}
+
+static int multi_st_ops_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ return 0;
+}
+
+static struct bpf_testmod_multi_st_ops *multi_st_ops_find_nolock(u32 id)
+{
+ struct bpf_testmod_multi_st_ops *st_ops;
+
+ hlist_for_each_entry(st_ops, &multi_st_ops_list, node) {
+ if (st_ops->id == id)
+ return st_ops;
+ }
+
+ return NULL;
+}
+
+int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id)
+{
+ struct bpf_testmod_multi_st_ops *st_ops;
+ unsigned long flags;
+ int ret = -1;
+
+ spin_lock_irqsave(&multi_st_ops_lock, flags);
+ st_ops = multi_st_ops_find_nolock(id);
+ if (st_ops)
+ ret = st_ops->test_1(args);
+ spin_unlock_irqrestore(&multi_st_ops_lock, flags);
+
+ return ret;
+}
+
+static int multi_st_ops_reg(void *kdata, struct bpf_link *link)
+{
+ struct bpf_testmod_multi_st_ops *st_ops =
+ (struct bpf_testmod_multi_st_ops *)kdata;
+ unsigned long flags;
+ int err = 0;
+ u32 id;
+
+ if (!st_ops->test_1)
+ return -EINVAL;
+
+ id = bpf_struct_ops_id(kdata);
+
+ spin_lock_irqsave(&multi_st_ops_lock, flags);
+ if (multi_st_ops_find_nolock(id)) {
+ pr_err("multi_st_ops(id:%d) has already been registered\n", id);
+ err = -EEXIST;
+ goto unlock;
+ }
+
+ st_ops->id = id;
+ hlist_add_head(&st_ops->node, &multi_st_ops_list);
+unlock:
+ spin_unlock_irqrestore(&multi_st_ops_lock, flags);
+
+ return err;
+}
+
+static void multi_st_ops_unreg(void *kdata, struct bpf_link *link)
+{
+ struct bpf_testmod_multi_st_ops *st_ops;
+ unsigned long flags;
+ u32 id;
+
+ id = bpf_struct_ops_id(kdata);
+
+ spin_lock_irqsave(&multi_st_ops_lock, flags);
+ st_ops = multi_st_ops_find_nolock(id);
+ if (st_ops)
+ hlist_del(&st_ops->node);
+ spin_unlock_irqrestore(&multi_st_ops_lock, flags);
+}
+
+static int bpf_testmod_multi_st_ops__test_1(struct st_ops_args *args)
+{
+ return 0;
+}
+
+static struct bpf_testmod_multi_st_ops multi_st_ops_cfi_stubs = {
+ .test_1 = bpf_testmod_multi_st_ops__test_1,
+};
+
+struct bpf_struct_ops testmod_multi_st_ops = {
+ .verifier_ops = &bpf_testmod_verifier_ops,
+ .init = multi_st_ops_init,
+ .init_member = multi_st_ops_init_member,
+ .reg = multi_st_ops_reg,
+ .unreg = multi_st_ops_unreg,
+ .cfi_stubs = &multi_st_ops_cfi_stubs,
+ .name = "bpf_testmod_multi_st_ops",
+ .owner = THIS_MODULE,
+};
+
extern int bpf_fentry_test1(int a);
static int bpf_testmod_init(void)
@@ -1550,6 +1711,7 @@ static int bpf_testmod_init(void)
ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2);
ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops3, bpf_testmod_ops3);
ret = ret ?: register_bpf_struct_ops(&testmod_st_ops, bpf_testmod_st_ops);
+ ret = ret ?: register_bpf_struct_ops(&testmod_multi_st_ops, bpf_testmod_multi_st_ops);
ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors,
ARRAY_SIZE(bpf_testmod_dtors),
THIS_MODULE);
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
index c9fab51f16e2..f6e492f9d042 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
@@ -116,4 +116,10 @@ struct bpf_testmod_st_ops {
struct module *owner;
};
+struct bpf_testmod_multi_st_ops {
+ int (*test_1)(struct st_ops_args *args);
+ struct hlist_node node;
+ int id;
+};
+
#endif /* _BPF_TESTMOD_H */
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
index b58817938deb..4df6fa6a92cb 100644
--- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
@@ -158,5 +158,9 @@ void bpf_kfunc_trusted_vma_test(struct vm_area_struct *ptr) __ksym;
void bpf_kfunc_trusted_task_test(struct task_struct *ptr) __ksym;
void bpf_kfunc_trusted_num_test(int *ptr) __ksym;
void bpf_kfunc_rcu_task_test(struct task_struct *ptr) __ksym;
+struct task_struct *bpf_kfunc_ret_rcu_test(void) __ksym;
+int *bpf_kfunc_ret_rcu_test_nostruct(int rdonly_buf_size) __ksym;
+
+int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id) __ksym;
#endif /* _BPF_TESTMOD_KFUNC_H */
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
index 4694422aa76c..88e4aeab21b7 100644
--- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
@@ -74,7 +74,7 @@ int main(int argc, char **argv)
/* Let's try detach it before it was ever attached */
ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
- if (ret != -1 || errno != ENOENT) {
+ if (ret != -ENOENT) {
printf("bpf_prog_detach2 not attached should fail: %m\n");
return 1;
}
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 78423cf89e01..74ecc281bb8c 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
#include <linux/capability.h>
#include <stdlib.h>
-#include <regex.h>
#include <test_progs.h>
#include <bpf/btf.h>
@@ -20,10 +19,12 @@
#define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure"
#define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success"
#define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg="
+#define TEST_TAG_EXPECT_NOT_MSG_PFX "comment:test_expect_not_msg="
#define TEST_TAG_EXPECT_XLATED_PFX "comment:test_expect_xlated="
#define TEST_TAG_EXPECT_FAILURE_UNPRIV "comment:test_expect_failure_unpriv"
#define TEST_TAG_EXPECT_SUCCESS_UNPRIV "comment:test_expect_success_unpriv"
#define TEST_TAG_EXPECT_MSG_PFX_UNPRIV "comment:test_expect_msg_unpriv="
+#define TEST_TAG_EXPECT_NOT_MSG_PFX_UNPRIV "comment:test_expect_not_msg_unpriv="
#define TEST_TAG_EXPECT_XLATED_PFX_UNPRIV "comment:test_expect_xlated_unpriv="
#define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level="
#define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags="
@@ -38,6 +39,10 @@
#define TEST_TAG_JITED_PFX_UNPRIV "comment:test_jited_unpriv="
#define TEST_TAG_CAPS_UNPRIV "comment:test_caps_unpriv="
#define TEST_TAG_LOAD_MODE_PFX "comment:load_mode="
+#define TEST_TAG_EXPECT_STDERR_PFX "comment:test_expect_stderr="
+#define TEST_TAG_EXPECT_STDERR_PFX_UNPRIV "comment:test_expect_stderr_unpriv="
+#define TEST_TAG_EXPECT_STDOUT_PFX "comment:test_expect_stdout="
+#define TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV "comment:test_expect_stdout_unpriv="
/* Warning: duplicated in bpf_misc.h */
#define POINTER_VALUE 0xbadcafe
@@ -61,24 +66,14 @@ enum load_mode {
NO_JITED = 1 << 1,
};
-struct expect_msg {
- const char *substr; /* substring match */
- regex_t regex;
- bool is_regex;
- bool on_next_line;
-};
-
-struct expected_msgs {
- struct expect_msg *patterns;
- size_t cnt;
-};
-
struct test_subspec {
char *name;
bool expect_failure;
struct expected_msgs expect_msgs;
struct expected_msgs expect_xlated;
struct expected_msgs jited;
+ struct expected_msgs stderr;
+ struct expected_msgs stdout;
int retval;
bool execute;
__u64 caps;
@@ -139,6 +134,10 @@ static void free_test_spec(struct test_spec *spec)
free_msgs(&spec->unpriv.expect_xlated);
free_msgs(&spec->priv.jited);
free_msgs(&spec->unpriv.jited);
+ free_msgs(&spec->unpriv.stderr);
+ free_msgs(&spec->priv.stderr);
+ free_msgs(&spec->unpriv.stdout);
+ free_msgs(&spec->priv.stdout);
free(spec->priv.name);
free(spec->unpriv.name);
@@ -206,7 +205,8 @@ static int compile_regex(const char *pattern, regex_t *regex)
return 0;
}
-static int __push_msg(const char *pattern, bool on_next_line, struct expected_msgs *msgs)
+static int __push_msg(const char *pattern, bool on_next_line, bool negative,
+ struct expected_msgs *msgs)
{
struct expect_msg *msg;
void *tmp;
@@ -222,6 +222,7 @@ static int __push_msg(const char *pattern, bool on_next_line, struct expected_ms
msg = &msgs->patterns[msgs->cnt];
msg->on_next_line = on_next_line;
msg->substr = pattern;
+ msg->negative = negative;
msg->is_regex = false;
if (strstr(pattern, "{{")) {
err = compile_regex(pattern, &msg->regex);
@@ -240,16 +241,16 @@ static int clone_msgs(struct expected_msgs *from, struct expected_msgs *to)
for (i = 0; i < from->cnt; i++) {
msg = &from->patterns[i];
- err = __push_msg(msg->substr, msg->on_next_line, to);
+ err = __push_msg(msg->substr, msg->on_next_line, msg->negative, to);
if (err)
return err;
}
return 0;
}
-static int push_msg(const char *substr, struct expected_msgs *msgs)
+static int push_msg(const char *substr, bool negative, struct expected_msgs *msgs)
{
- return __push_msg(substr, false, msgs);
+ return __push_msg(substr, false, negative, msgs);
}
static int push_disasm_msg(const char *regex_str, bool *on_next_line, struct expected_msgs *msgs)
@@ -260,7 +261,7 @@ static int push_disasm_msg(const char *regex_str, bool *on_next_line, struct exp
*on_next_line = false;
return 0;
}
- err = __push_msg(regex_str, *on_next_line, msgs);
+ err = __push_msg(regex_str, *on_next_line, false, msgs);
if (err)
return err;
*on_next_line = true;
@@ -374,6 +375,7 @@ enum arch {
ARCH_X86_64 = 0x2,
ARCH_ARM64 = 0x4,
ARCH_RISCV64 = 0x8,
+ ARCH_S390X = 0x10,
};
static int get_current_arch(void)
@@ -384,6 +386,8 @@ static int get_current_arch(void)
return ARCH_ARM64;
#elif defined(__riscv) && __riscv_xlen == 64
return ARCH_RISCV64;
+#elif defined(__s390x__)
+ return ARCH_S390X;
#endif
return ARCH_UNKNOWN;
}
@@ -404,6 +408,10 @@ static int parse_test_spec(struct test_loader *tester,
bool xlated_on_next_line = true;
bool unpriv_jit_on_next_line;
bool jit_on_next_line;
+ bool stderr_on_next_line = true;
+ bool unpriv_stderr_on_next_line = true;
+ bool stdout_on_next_line = true;
+ bool unpriv_stdout_on_next_line = true;
bool collect_jit = false;
int func_id, i, err = 0;
u32 arch_mask = 0;
@@ -465,12 +473,22 @@ static int parse_test_spec(struct test_loader *tester,
spec->auxiliary = true;
spec->mode_mask |= UNPRIV;
} else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_MSG_PFX))) {
- err = push_msg(msg, &spec->priv.expect_msgs);
+ err = push_msg(msg, false, &spec->priv.expect_msgs);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= PRIV;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_NOT_MSG_PFX))) {
+ err = push_msg(msg, true, &spec->priv.expect_msgs);
if (err)
goto cleanup;
spec->mode_mask |= PRIV;
} else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_MSG_PFX_UNPRIV))) {
- err = push_msg(msg, &spec->unpriv.expect_msgs);
+ err = push_msg(msg, false, &spec->unpriv.expect_msgs);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= UNPRIV;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_NOT_MSG_PFX_UNPRIV))) {
+ err = push_msg(msg, true, &spec->unpriv.expect_msgs);
if (err)
goto cleanup;
spec->mode_mask |= UNPRIV;
@@ -565,8 +583,10 @@ static int parse_test_spec(struct test_loader *tester,
arch = ARCH_ARM64;
} else if (strcmp(val, "RISCV64") == 0) {
arch = ARCH_RISCV64;
+ } else if (strcmp(val, "s390x") == 0) {
+ arch = ARCH_S390X;
} else {
- PRINT_FAIL("bad arch spec: '%s'", val);
+ PRINT_FAIL("bad arch spec: '%s'\n", val);
err = -EINVAL;
goto cleanup;
}
@@ -593,6 +613,26 @@ static int parse_test_spec(struct test_loader *tester,
err = -EINVAL;
goto cleanup;
}
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDERR_PFX))) {
+ err = push_disasm_msg(msg, &stderr_on_next_line,
+ &spec->priv.stderr);
+ if (err)
+ goto cleanup;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDERR_PFX_UNPRIV))) {
+ err = push_disasm_msg(msg, &unpriv_stderr_on_next_line,
+ &spec->unpriv.stderr);
+ if (err)
+ goto cleanup;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDOUT_PFX))) {
+ err = push_disasm_msg(msg, &stdout_on_next_line,
+ &spec->priv.stdout);
+ if (err)
+ goto cleanup;
+ } else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV))) {
+ err = push_disasm_msg(msg, &unpriv_stdout_on_next_line,
+ &spec->unpriv.stdout);
+ if (err)
+ goto cleanup;
}
}
@@ -646,6 +686,10 @@ static int parse_test_spec(struct test_loader *tester,
clone_msgs(&spec->priv.expect_xlated, &spec->unpriv.expect_xlated);
if (spec->unpriv.jited.cnt == 0)
clone_msgs(&spec->priv.jited, &spec->unpriv.jited);
+ if (spec->unpriv.stderr.cnt == 0)
+ clone_msgs(&spec->priv.stderr, &spec->unpriv.stderr);
+ if (spec->unpriv.stdout.cnt == 0)
+ clone_msgs(&spec->priv.stdout, &spec->unpriv.stdout);
}
spec->valid = true;
@@ -707,44 +751,155 @@ static void emit_jited(const char *jited, bool force)
fprintf(stdout, "JITED:\n=============\n%s=============\n", jited);
}
-static void validate_msgs(char *log_buf, struct expected_msgs *msgs,
- void (*emit_fn)(const char *buf, bool force))
+static void emit_stderr(const char *stderr, bool force)
{
- const char *log = log_buf, *prev_match;
+ if (!force && env.verbosity == VERBOSE_NONE)
+ return;
+ fprintf(stdout, "STDERR:\n=============\n%s=============\n", stderr);
+}
+
+static void emit_stdout(const char *bpf_stdout, bool force)
+{
+ if (!force && env.verbosity == VERBOSE_NONE)
+ return;
+ fprintf(stdout, "STDOUT:\n=============\n%s=============\n", bpf_stdout);
+}
+
+static const char *match_msg(struct expect_msg *msg, const char **log)
+{
+ const char *match = NULL;
regmatch_t reg_match[1];
- int prev_match_line;
- int match_line;
- int i, j, err;
+ int err;
+
+ if (!msg->is_regex) {
+ match = strstr(*log, msg->substr);
+ if (match)
+ *log = match + strlen(msg->substr);
+ } else {
+ err = regexec(&msg->regex, *log, 1, reg_match, 0);
+ if (err == 0) {
+ match = *log + reg_match[0].rm_so;
+ *log += reg_match[0].rm_eo;
+ }
+ }
+ return match;
+}
+
+static int count_lines(const char *start, const char *end)
+{
+ const char *tmp;
+ int n = 0;
+
+ for (tmp = start; tmp < end; ++tmp)
+ if (*tmp == '\n')
+ n++;
+ return n;
+}
+
+struct match {
+ const char *start;
+ const char *end;
+ int line;
+};
+
+/*
+ * Positive messages are matched sequentially, each next message
+ * is looked for starting from the end of a previous matched one.
+ */
+static void match_positive_msgs(const char *log, struct expected_msgs *msgs, struct match *matches)
+{
+ const char *prev_match;
+ int i, line;
- prev_match_line = -1;
- match_line = 0;
prev_match = log;
+ line = 0;
for (i = 0; i < msgs->cnt; i++) {
struct expect_msg *msg = &msgs->patterns[i];
- const char *match = NULL, *pat_status;
- bool wrong_line = false;
-
- if (!msg->is_regex) {
- match = strstr(log, msg->substr);
- if (match)
- log = match + strlen(msg->substr);
- } else {
- err = regexec(&msg->regex, log, 1, reg_match, 0);
- if (err == 0) {
- match = log + reg_match[0].rm_so;
- log += reg_match[0].rm_eo;
+ const char *match = NULL;
+
+ if (msg->negative)
+ continue;
+
+ match = match_msg(msg, &log);
+ if (match) {
+ line += count_lines(prev_match, match);
+ matches[i].start = match;
+ matches[i].end = log;
+ matches[i].line = line;
+ prev_match = match;
+ }
+ }
+}
+
+/*
+ * Each negative messages N located between positive messages P1 and P2
+ * is matched in the span P1.end .. P2.start. Consequently, negative messages
+ * are unordered within the span.
+ */
+static void match_negative_msgs(const char *log, struct expected_msgs *msgs, struct match *matches)
+{
+ const char *start = log, *end, *next, *match;
+ const char *log_end = log + strlen(log);
+ int i, j, next_positive;
+
+ for (i = 0; i < msgs->cnt; i++) {
+ struct expect_msg *msg = &msgs->patterns[i];
+
+ /* positive message bumps span start */
+ if (!msg->negative) {
+ start = matches[i].end ?: start;
+ continue;
+ }
+
+ /* count stride of negative patterns and adjust span end */
+ end = log_end;
+ for (next_positive = i + 1; next_positive < msgs->cnt; next_positive++) {
+ if (!msgs->patterns[next_positive].negative) {
+ end = matches[next_positive].start;
+ break;
}
}
- if (match) {
- for (; prev_match < match; ++prev_match)
- if (*prev_match == '\n')
- ++match_line;
- wrong_line = msg->on_next_line && prev_match_line >= 0 &&
- prev_match_line + 1 != match_line;
+ /* try matching negative messages within identified span */
+ for (j = i; j < next_positive; j++) {
+ next = start;
+ match = match_msg(msg, &next);
+ if (match && next <= end) {
+ matches[j].start = match;
+ matches[j].end = next;
+ }
}
- if (!match || wrong_line) {
+ /* -1 to account for i++ */
+ i = next_positive - 1;
+ }
+}
+
+void validate_msgs(const char *log_buf, struct expected_msgs *msgs,
+ void (*emit_fn)(const char *buf, bool force))
+{
+ struct match matches[msgs->cnt];
+ struct match *prev_match = NULL;
+ int i, j;
+
+ memset(matches, 0, sizeof(*matches) * msgs->cnt);
+ match_positive_msgs(log_buf, msgs, matches);
+ match_negative_msgs(log_buf, msgs, matches);
+
+ for (i = 0; i < msgs->cnt; i++) {
+ struct expect_msg *msg = &msgs->patterns[i];
+ struct match *match = &matches[i];
+ const char *pat_status;
+ bool unexpected;
+ bool wrong_line;
+ bool no_match;
+
+ no_match = !msg->negative && !match->start;
+ wrong_line = !msg->negative &&
+ msg->on_next_line &&
+ prev_match && prev_match->line + 1 != match->line;
+ unexpected = msg->negative && match->start;
+ if (no_match || wrong_line || unexpected) {
PRINT_FAIL("expect_msg\n");
if (env.verbosity == VERBOSE_NONE)
emit_fn(log_buf, true /*force*/);
@@ -754,8 +909,10 @@ static void validate_msgs(char *log_buf, struct expected_msgs *msgs,
pat_status = "MATCHED ";
else if (wrong_line)
pat_status = "WRONG LINE";
- else
+ else if (no_match)
pat_status = "EXPECTED ";
+ else
+ pat_status = "UNEXPECTED";
msg = &msgs->patterns[j];
fprintf(stderr, "%s %s: '%s'\n",
pat_status,
@@ -765,12 +922,13 @@ static void validate_msgs(char *log_buf, struct expected_msgs *msgs,
if (wrong_line) {
fprintf(stderr,
"expecting match at line %d, actual match is at line %d\n",
- prev_match_line + 1, match_line);
+ prev_match->line + 1, match->line);
}
break;
}
- prev_match_line = match_line;
+ if (!msg->negative)
+ prev_match = match;
}
}
@@ -929,6 +1087,19 @@ out:
return err;
}
+/* Read the bpf stream corresponding to the stream_id */
+static int get_stream(int stream_id, int prog_fd, char *text, size_t text_sz)
+{
+ LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
+ int ret;
+
+ ret = bpf_prog_stream_read(prog_fd, stream_id, text, text_sz, &ropts);
+ ASSERT_GT(ret, 0, "stream read");
+ text[ret] = '\0';
+
+ return ret;
+}
+
/* this function is forced noinline and has short generic name to look better
* in test_progs output (in case of a failure)
*/
@@ -1083,7 +1254,7 @@ void run_subtest(struct test_loader *tester,
link = bpf_map__attach_struct_ops(map);
if (!link) {
PRINT_FAIL("bpf_map__attach_struct_ops failed for map %s: err=%d\n",
- bpf_map__name(map), err);
+ bpf_map__name(map), -errno);
goto tobj_cleanup;
}
links[links_cnt++] = link;
@@ -1103,6 +1274,31 @@ void run_subtest(struct test_loader *tester,
PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval);
goto tobj_cleanup;
}
+
+ if (subspec->stderr.cnt) {
+ err = get_stream(2, bpf_program__fd(tprog),
+ tester->log_buf, tester->log_buf_sz);
+ if (err <= 0) {
+ PRINT_FAIL("Unexpected retval from get_stream(): %d, errno = %d\n",
+ err, errno);
+ goto tobj_cleanup;
+ }
+ emit_stderr(tester->log_buf, false /*force*/);
+ validate_msgs(tester->log_buf, &subspec->stderr, emit_stderr);
+ }
+
+ if (subspec->stdout.cnt) {
+ err = get_stream(1, bpf_program__fd(tprog),
+ tester->log_buf, tester->log_buf_sz);
+ if (err <= 0) {
+ PRINT_FAIL("Unexpected retval from get_stream(): %d, errno = %d\n",
+ err, errno);
+ goto tobj_cleanup;
+ }
+ emit_stdout(tester->log_buf, false /*force*/);
+ validate_msgs(tester->log_buf, &subspec->stdout, emit_stdout);
+ }
+
/* redo bpf_map__attach_struct_ops for each test */
while (links_cnt > 0)
bpf_link__destroy(links[--links_cnt]);
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 309d9d4a8ace..02a85dda30e6 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -14,12 +14,14 @@
#include <netinet/in.h>
#include <sys/select.h>
#include <sys/socket.h>
+#include <linux/keyctl.h>
#include <sys/un.h>
#include <bpf/btf.h>
#include <time.h>
#include "json_writer.h"
#include "network_helpers.h"
+#include "verification_cert.h"
/* backtrace() and backtrace_symbols_fd() are glibc specific,
* use header file when glibc is available and provide stub
@@ -1928,6 +1930,13 @@ static void free_test_states(void)
}
}
+static __u32 register_session_key(const char *key_data, size_t key_data_size)
+{
+ return syscall(__NR_add_key, "asymmetric", "libbpf_session_key",
+ (const void *)key_data, key_data_size,
+ KEY_SPEC_SESSION_KEYRING);
+}
+
int main(int argc, char **argv)
{
static const struct argp argp = {
@@ -1961,6 +1970,10 @@ int main(int argc, char **argv)
/* Use libbpf 1.0 API mode */
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
libbpf_set_print(libbpf_print_fn);
+ err = register_session_key((const char *)test_progs_verification_cert,
+ test_progs_verification_cert_len);
+ if (err < 0)
+ return err;
traffic_monitor_set_print(traffic_monitor_print_fn);
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index df2222a1806f..eebfc18cdcd2 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -7,6 +7,7 @@
#include <errno.h>
#include <string.h>
#include <assert.h>
+#include <regex.h>
#include <stdlib.h>
#include <stdarg.h>
#include <time.h>
@@ -546,4 +547,20 @@ extern void test_loader_fini(struct test_loader *tester);
test_loader_fini(&tester); \
})
+struct expect_msg {
+ const char *substr; /* substring match */
+ regex_t regex;
+ bool is_regex;
+ bool on_next_line;
+ bool negative;
+};
+
+struct expected_msgs {
+ struct expect_msg *patterns;
+ size_t cnt;
+};
+
+void validate_msgs(const char *log_buf, struct expected_msgs *msgs,
+ void (*emit_fn)(const char *buf, bool force));
+
#endif /* __TEST_PROGS_H */
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index fd2da2234cc9..76568db7a664 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -1372,7 +1372,7 @@ run:
} else
fprintf(stderr, "unknown test\n");
out:
- /* Detatch and zero all the maps */
+ /* Detach and zero all the maps */
bpf_prog_detach2(bpf_program__fd(progs[3]), cg_fd, BPF_CGROUP_SOCK_OPS);
for (i = 0; i < ARRAY_SIZE(links); i++) {
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index 595194453ff8..35b4893ccdf8 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -15,20 +15,18 @@
#include <bpf/libbpf.h>
#include <sys/ioctl.h>
#include <linux/rtnetlink.h>
-#include <signal.h>
#include <linux/perf_event.h>
-#include <linux/err.h>
-#include "bpf_util.h"
#include "cgroup_helpers.h"
#include "test_tcpnotify.h"
-#include "trace_helpers.h"
#include "testing_helpers.h"
#define SOCKET_BUFFER_SIZE (getpagesize() < 8192L ? getpagesize() : 8192L)
pthread_t tid;
+static bool exit_thread;
+
int rx_callbacks;
static void dummyfn(void *ctx, int cpu, void *data, __u32 size)
@@ -45,7 +43,7 @@ void tcp_notifier_poller(struct perf_buffer *pb)
{
int err;
- while (1) {
+ while (!exit_thread) {
err = perf_buffer__poll(pb, 100);
if (err < 0 && err != -EINTR) {
printf("failed perf_buffer__poll: %d\n", err);
@@ -78,15 +76,10 @@ int main(int argc, char **argv)
int error = EXIT_FAILURE;
struct bpf_object *obj;
char test_script[80];
- cpu_set_t cpuset;
__u32 key = 0;
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
- CPU_ZERO(&cpuset);
- CPU_SET(0, &cpuset);
- pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
-
cg_fd = cgroup_setup_and_join(cg_path);
if (cg_fd < 0)
goto err;
@@ -151,6 +144,13 @@ int main(int argc, char **argv)
sleep(10);
+ exit_thread = true;
+ int ret = pthread_join(tid, NULL);
+ if (ret) {
+ printf("FAILED: pthread_join\n");
+ goto err;
+ }
+
if (verify_result(&g)) {
printf("FAILED: Wrong stats Expected %d calls, got %d\n",
g.ncalls, rx_callbacks);
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 65aafe0003db..62db060298a4 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -241,4 +241,6 @@ done
if [ $failures -eq 0 ]; then
echo "All tests successful!"
+else
+ exit 1
fi
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 5e9f16683be5..16eb37e5bad6 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -399,7 +399,7 @@ int unload_module(const char *name, bool verbose)
return 0;
}
-int load_module(const char *path, bool verbose)
+static int __load_module(const char *path, const char *param_values, bool verbose)
{
int fd;
@@ -411,7 +411,7 @@ int load_module(const char *path, bool verbose)
fprintf(stdout, "Can't find %s kernel module: %d\n", path, -errno);
return -ENOENT;
}
- if (finit_module(fd, "", 0)) {
+ if (finit_module(fd, param_values, 0)) {
fprintf(stdout, "Failed to load %s into the kernel: %d\n", path, -errno);
close(fd);
return -EINVAL;
@@ -423,6 +423,16 @@ int load_module(const char *path, bool verbose)
return 0;
}
+int load_module_params(const char *path, const char *param_values, bool verbose)
+{
+ return __load_module(path, param_values, verbose);
+}
+
+int load_module(const char *path, bool verbose)
+{
+ return __load_module(path, "", verbose);
+}
+
int unload_bpf_testmod(bool verbose)
{
return unload_module("bpf_testmod", verbose);
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index 46d7f7089f63..eb20d3772218 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -39,6 +39,7 @@ int kern_sync_rcu(void);
int finit_module(int fd, const char *param_values, int flags);
int delete_module(const char *name, int flags);
int load_module(const char *path, bool verbose);
+int load_module_params(const char *path, const char *param_values, bool verbose);
int unload_module(const char *name, bool verbose);
static inline __u64 get_time_ns(void)
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 81943c6254e6..171987627f3a 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -17,7 +17,9 @@
#include <linux/limits.h>
#include <libelf.h>
#include <gelf.h>
+#include "bpf/hashmap.h"
#include "bpf/libbpf_internal.h"
+#include "bpf_util.h"
#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe"
#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe"
@@ -519,3 +521,235 @@ void read_trace_pipe(void)
{
read_trace_pipe_iter(trace_pipe_cb, NULL, 0);
}
+
+static size_t symbol_hash(long key, void *ctx __maybe_unused)
+{
+ return str_hash((const char *) key);
+}
+
+static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
+{
+ return strcmp((const char *) key1, (const char *) key2) == 0;
+}
+
+static bool is_invalid_entry(char *buf, bool kernel)
+{
+ if (kernel && strchr(buf, '['))
+ return true;
+ if (!kernel && !strchr(buf, '['))
+ return true;
+ return false;
+}
+
+static const char * const trace_blacklist[] = {
+ "migrate_disable",
+ "migrate_enable",
+ "rcu_read_unlock_strict",
+ "preempt_count_add",
+ "preempt_count_sub",
+ "__rcu_read_lock",
+ "__rcu_read_unlock",
+ "bpf_get_numa_node_id",
+};
+
+static bool skip_entry(char *name)
+{
+ int i;
+
+ /*
+ * We attach to almost all kernel functions and some of them
+ * will cause 'suspicious RCU usage' when fprobe is attached
+ * to them. Filter out the current culprits - arch_cpu_idle
+ * default_idle and rcu_* functions.
+ */
+ if (!strcmp(name, "arch_cpu_idle"))
+ return true;
+ if (!strcmp(name, "default_idle"))
+ return true;
+ if (!strncmp(name, "rcu_", 4))
+ return true;
+ if (!strcmp(name, "bpf_dispatcher_xdp_func"))
+ return true;
+ if (!strncmp(name, "__ftrace_invalid_address__",
+ sizeof("__ftrace_invalid_address__") - 1))
+ return true;
+
+ for (i = 0; i < ARRAY_SIZE(trace_blacklist); i++) {
+ if (!strcmp(name, trace_blacklist[i]))
+ return true;
+ }
+
+ return false;
+}
+
+/* Do comparison by ignoring '.llvm.<hash>' suffixes. */
+static int compare_name(const char *name1, const char *name2)
+{
+ const char *res1, *res2;
+ int len1, len2;
+
+ res1 = strstr(name1, ".llvm.");
+ res2 = strstr(name2, ".llvm.");
+ len1 = res1 ? res1 - name1 : strlen(name1);
+ len2 = res2 ? res2 - name2 : strlen(name2);
+
+ if (len1 == len2)
+ return strncmp(name1, name2, len1);
+ if (len1 < len2)
+ return strncmp(name1, name2, len1) <= 0 ? -1 : 1;
+ return strncmp(name1, name2, len2) >= 0 ? 1 : -1;
+}
+
+static int load_kallsyms_compare(const void *p1, const void *p2)
+{
+ return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name);
+}
+
+static int search_kallsyms_compare(const void *p1, const struct ksym *p2)
+{
+ return compare_name(p1, p2->name);
+}
+
+int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel)
+{
+ size_t cap = 0, cnt = 0;
+ char *name = NULL, *ksym_name, **syms = NULL;
+ struct hashmap *map;
+ struct ksyms *ksyms;
+ struct ksym *ks;
+ char buf[256];
+ FILE *f;
+ int err = 0;
+
+ ksyms = load_kallsyms_custom_local(load_kallsyms_compare);
+ if (!ksyms)
+ return -EINVAL;
+
+ /*
+ * The available_filter_functions contains many duplicates,
+ * but other than that all symbols are usable to trace.
+ * Filtering out duplicates by using hashmap__add, which won't
+ * add existing entry.
+ */
+
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0)
+ f = fopen("/sys/kernel/tracing/available_filter_functions", "r");
+ else
+ f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
+
+ if (!f)
+ return -EINVAL;
+
+ map = hashmap__new(symbol_hash, symbol_equal, NULL);
+ if (IS_ERR(map)) {
+ err = libbpf_get_error(map);
+ goto error;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ if (is_invalid_entry(buf, kernel))
+ continue;
+
+ free(name);
+ if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
+ continue;
+ if (skip_entry(name))
+ continue;
+
+ ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare);
+ if (!ks) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ ksym_name = ks->name;
+ err = hashmap__add(map, ksym_name, 0);
+ if (err == -EEXIST) {
+ err = 0;
+ continue;
+ }
+ if (err)
+ goto error;
+
+ err = libbpf_ensure_mem((void **) &syms, &cap,
+ sizeof(*syms), cnt + 1);
+ if (err)
+ goto error;
+
+ syms[cnt++] = ksym_name;
+ }
+
+ *symsp = syms;
+ *cntp = cnt;
+
+error:
+ free(name);
+ fclose(f);
+ hashmap__free(map);
+ if (err)
+ free(syms);
+ return err;
+}
+
+int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel)
+{
+ unsigned long *addr, *addrs, *tmp_addrs;
+ int err = 0, max_cnt, inc_cnt;
+ char *name = NULL;
+ size_t cnt = 0;
+ char buf[256];
+ FILE *f;
+
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0)
+ f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r");
+ else
+ f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r");
+
+ if (!f)
+ return -ENOENT;
+
+ /* In my local setup, the number of entries is 50k+ so Let us initially
+ * allocate space to hold 64k entries. If 64k is not enough, incrementally
+ * increase 1k each time.
+ */
+ max_cnt = 65536;
+ inc_cnt = 1024;
+ addrs = malloc(max_cnt * sizeof(long));
+ if (addrs == NULL) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ if (is_invalid_entry(buf, kernel))
+ continue;
+
+ free(name);
+ if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2)
+ continue;
+ if (skip_entry(name))
+ continue;
+
+ if (cnt == max_cnt) {
+ max_cnt += inc_cnt;
+ tmp_addrs = realloc(addrs, max_cnt);
+ if (!tmp_addrs) {
+ err = -ENOMEM;
+ goto error;
+ }
+ addrs = tmp_addrs;
+ }
+
+ addrs[cnt++] = (unsigned long)addr;
+ }
+
+ *addrsp = addrs;
+ *cntp = cnt;
+
+error:
+ free(name);
+ fclose(f);
+ if (err)
+ free(addrs);
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 2ce873c9f9aa..9437bdd4afa5 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -41,4 +41,7 @@ ssize_t get_rel_offset(uintptr_t addr);
int read_build_id(const char *path, char *build_id, size_t size);
+int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel);
+int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel);
+
#endif
diff --git a/tools/testing/selftests/bpf/usdt.h b/tools/testing/selftests/bpf/usdt.h
new file mode 100644
index 000000000000..549d1f774810
--- /dev/null
+++ b/tools/testing/selftests/bpf/usdt.h
@@ -0,0 +1,545 @@
+// SPDX-License-Identifier: BSD-2-Clause
+/*
+ * This single-header library defines a collection of variadic macros for
+ * defining and triggering USDTs (User Statically-Defined Tracepoints):
+ *
+ * - For USDTs without associated semaphore:
+ * USDT(group, name, args...)
+ *
+ * - For USDTs with implicit (transparent to the user) semaphore:
+ * USDT_WITH_SEMA(group, name, args...)
+ * USDT_IS_ACTIVE(group, name)
+ *
+ * - For USDTs with explicit (user-defined and provided) semaphore:
+ * USDT_WITH_EXPLICIT_SEMA(sema, group, name, args...)
+ * USDT_SEMA_IS_ACTIVE(sema)
+ *
+ * all of which emit a NOP instruction into the instruction stream, and so
+ * have *zero* overhead for the surrounding code. USDTs are identified by
+ * a combination of `group` and `name` identifiers, which is used by external
+ * tracing tooling (tracers) for identifying exact USDTs of interest.
+ *
+ * USDTs can have an associated (2-byte) activity counter (USDT semaphore),
+ * automatically maintained by Linux kernel whenever any correctly written
+ * BPF-based tracer is attached to the USDT. This USDT semaphore can be used
+ * to check whether there is a need to do any extra data collection and
+ * processing for a given USDT (if necessary), and otherwise avoid extra work
+ * for a common case of USDT not being traced ("active").
+ *
+ * See documentation for USDT_WITH_SEMA()/USDT_IS_ACTIVE() or
+ * USDT_WITH_EXPLICIT_SEMA()/USDT_SEMA_IS_ACTIVE() APIs below for details on
+ * working with USDTs with implicitly or explicitly associated
+ * USDT semaphores, respectively.
+ *
+ * There is also some additional data recorded into an auxiliary note
+ * section. The data in the note section describes the operands, in terms of
+ * size and location, used by tracing tooling to know where to find USDT
+ * arguments. Each location is encoded as an assembler operand string.
+ * Tracing tools (bpftrace and BPF-based tracers, systemtap, etc) insert
+ * breakpoints on top of the nop, and decode the location operand-strings,
+ * like an assembler, to find the values being passed.
+ *
+ * The operand strings are selected by the compiler for each operand.
+ * They are constrained by inline-assembler codes.The default is:
+ *
+ * #define USDT_ARG_CONSTRAINT nor
+ *
+ * This is a good default if the operands tend to be integral and
+ * moderate in number (smaller than number of registers). In other
+ * cases, the compiler may report "'asm' requires impossible reload" or
+ * similar. In this case, consider simplifying the macro call (fewer
+ * and simpler operands), reduce optimization, or override the default
+ * constraints string via:
+ *
+ * #define USDT_ARG_CONSTRAINT g
+ * #include <usdt.h>
+ *
+ * For some historical description of USDT v3 format (the one used by this
+ * library and generally recognized and assumed by BPF-based tracing tools)
+ * see [0]. The more formal specification can be found at [1]. Additional
+ * argument constraints information can be found at [2].
+ *
+ * Original SystemTap's sys/sdt.h implementation ([3]) was used as a base for
+ * this USDT library implementation. Current implementation differs *a lot* in
+ * terms of exposed user API and general usability, which was the main goal
+ * and focus of the reimplementation work. Nevertheless, underlying recorded
+ * USDT definitions are fully binary compatible and any USDT-based tooling
+ * should work equally well with USDTs defined by either SystemTap's or this
+ * library's USDT implementation.
+ *
+ * [0] https://ecos.sourceware.org/ml/systemtap/2010-q3/msg00145.html
+ * [1] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+ * [2] https://gcc.gnu.org/onlinedocs/gcc/Constraints.html
+ * [3] https://sourceware.org/git/?p=systemtap.git;a=blob;f=includes/sys/sdt.h
+ */
+#ifndef __USDT_H
+#define __USDT_H
+
+/*
+ * Changelog:
+ *
+ * 0.1.0
+ * -----
+ * - Initial release
+ */
+#define USDT_MAJOR_VERSION 0
+#define USDT_MINOR_VERSION 1
+#define USDT_PATCH_VERSION 0
+
+/* C++20 and C23 added __VA_OPT__ as a standard replacement for non-standard `##__VA_ARGS__` extension */
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L) || (defined(__cplusplus) && __cplusplus > 201703L)
+#define __usdt_va_opt 1
+#define __usdt_va_args(...) __VA_OPT__(,) __VA_ARGS__
+#else
+#define __usdt_va_args(...) , ##__VA_ARGS__
+#endif
+
+/*
+ * Trigger USDT with `group`:`name` identifier and pass through `args` as its
+ * arguments. Zero arguments are acceptable as well. No USDT semaphore is
+ * associated with this USDT.
+ *
+ * Such "semaphoreless" USDTs are commonly used when there is no extra data
+ * collection or processing needed to collect and prepare USDT arguments and
+ * they are just available in the surrounding code. USDT() macro will just
+ * record their locations in CPU registers or in memory for tracing tooling to
+ * be able to access them, if necessary.
+ */
+#ifdef __usdt_va_opt
+#define USDT(group, name, ...) \
+ __usdt_probe(group, name, __usdt_sema_none, 0 __VA_OPT__(,) __VA_ARGS__)
+#else
+#define USDT(group, name, ...) \
+ __usdt_probe(group, name, __usdt_sema_none, 0, ##__VA_ARGS__)
+#endif
+
+/*
+ * Trigger USDT with `group`:`name` identifier and pass through `args` as its
+ * arguments. Zero arguments are acceptable as well. USDT also get an
+ * implicitly-defined associated USDT semaphore, which will be "activated" by
+ * tracing tooling and can be used to check whether USDT is being actively
+ * observed.
+ *
+ * USDTs with semaphore are commonly used when there is a need to perform
+ * additional data collection and processing to prepare USDT arguments, which
+ * otherwise might not be necessary for the rest of application logic. In such
+ * case, USDT semaphore can be used to avoid unnecessary extra work. If USDT
+ * is not traced (which is presumed to be a common situation), the associated
+ * USDT semaphore is "inactive", and so there is no need to waste resources to
+ * prepare USDT arguments. Use USDT_IS_ACTIVE(group, name) to check whether
+ * USDT is "active".
+ *
+ * N.B. There is an inherent (albeit short) gap between checking whether USDT
+ * is active and triggering corresponding USDT, in which external tracer can
+ * be attached to an USDT and activate USDT semaphore after the activity check.
+ * If such a race occurs, tracers might miss one USDT execution. Tracers are
+ * expected to accommodate such possibility and this is expected to not be
+ * a problem for applications and tracers.
+ *
+ * N.B. Implicit USDT semaphore defined by USDT_WITH_SEMA() is contained
+ * within a single executable or shared library and is not shared outside
+ * them. I.e., if you use USDT_WITH_SEMA() with the same USDT group and name
+ * identifier across executable and shared library, it will work and won't
+ * conflict, per se, but will define independent USDT semaphores, one for each
+ * shared library/executable in which USDT_WITH_SEMA(group, name) is used.
+ * That is, if you attach to this USDT in one shared library (or executable),
+ * then only USDT semaphore within that shared library (or executable) will be
+ * updated by the kernel, while other libraries (or executable) will not see
+ * activated USDT semaphore. In short, it's best to use unique USDT group:name
+ * identifiers across different shared libraries (and, equivalently, between
+ * executable and shared library). This is advanced consideration and is
+ * rarely (if ever) seen in practice, but just to avoid surprises this is
+ * called out here. (Static libraries become a part of final executable, once
+ * linked by linker, so the above considerations don't apply to them.)
+ */
+#ifdef __usdt_va_opt
+#define USDT_WITH_SEMA(group, name, ...) \
+ __usdt_probe(group, name, \
+ __usdt_sema_implicit, __usdt_sema_name(group, name) \
+ __VA_OPT__(,) __VA_ARGS__)
+#else
+#define USDT_WITH_SEMA(group, name, ...) \
+ __usdt_probe(group, name, \
+ __usdt_sema_implicit, __usdt_sema_name(group, name), \
+ ##__VA_ARGS__)
+#endif
+
+struct usdt_sema { volatile unsigned short active; };
+
+/*
+ * Check if USDT with `group`:`name` identifier is "active" (i.e., whether it
+ * is attached to by external tracing tooling and is actively observed).
+ *
+ * This macro can be used to decide whether any additional and potentially
+ * expensive data collection or processing should be done to pass extra
+ * information into the given USDT. It is assumed that USDT is triggered with
+ * USDT_WITH_SEMA() macro which will implicitly define associated USDT
+ * semaphore. (If one needs more control over USDT semaphore, see
+ * USDT_DEFINE_SEMA() and USDT_WITH_EXPLICIT_SEMA() macros below.)
+ *
+ * N.B. Such checks are necessarily racy and speculative. Between checking
+ * whether USDT is active and triggering the USDT itself, tracer can be
+ * detached with no notification. This race should be extremely rare and worst
+ * case should result in one-time wasted extra data collection and processing.
+ */
+#define USDT_IS_ACTIVE(group, name) ({ \
+ extern struct usdt_sema __usdt_sema_name(group, name) \
+ __usdt_asm_name(__usdt_sema_name(group, name)); \
+ __usdt_sema_implicit(__usdt_sema_name(group, name)); \
+ __usdt_sema_name(group, name).active > 0; \
+})
+
+/*
+ * APIs for working with user-defined explicit USDT semaphores.
+ *
+ * This is a less commonly used advanced API for use cases in which user needs
+ * an explicit control over (potentially shared across multiple USDTs) USDT
+ * semaphore instance. This can be used when there is a group of logically
+ * related USDTs that all need extra data collection and processing whenever
+ * any of a family of related USDTs are "activated" (i.e., traced). In such
+ * a case, all such related USDTs will be associated with the same shared USDT
+ * semaphore defined with USDT_DEFINE_SEMA() and the USDTs themselves will be
+ * triggered with USDT_WITH_EXPLICIT_SEMA() macros, taking an explicit extra
+ * USDT semaphore identifier as an extra parameter.
+ */
+
+/**
+ * Underlying C global variable name for user-defined USDT semaphore with
+ * `sema` identifier. Could be useful for debugging, but normally shouldn't be
+ * used explicitly.
+ */
+#define USDT_SEMA(sema) __usdt_sema_##sema
+
+/*
+ * Define storage for user-defined USDT semaphore `sema`.
+ *
+ * Should be used only once in non-header source file to let compiler allocate
+ * space for the semaphore variable. Just like with any other global variable.
+ *
+ * This macro can be used anywhere where global variable declaration is
+ * allowed. Just like with global variable definitions, there should be only
+ * one definition of user-defined USDT semaphore with given `sema` identifier,
+ * otherwise compiler or linker will complain about duplicate variable
+ * definition.
+ *
+ * For C++, it is allowed to use USDT_DEFINE_SEMA() both in global namespace
+ * and inside namespaces (including nested namespaces). Just make sure that
+ * USDT_DECLARE_SEMA() is placed within the namespace where this semaphore is
+ * referenced, or any of its parent namespaces, so the C++ language-level
+ * identifier is visible to the code that needs to reference the semaphore.
+ * At the lowest layer, USDT semaphores have global naming and visibility
+ * (they have a corresponding `__usdt_sema_<name>` symbol, which can be linked
+ * against from C or C++ code, if necessary). To keep it simple, putting
+ * USDT_DECLARE_SEMA() declarations into global namespaces is the simplest
+ * no-brainer solution. All these aspects are irrelevant for plain C, because
+ * C doesn't have namespaces and everything is always in the global namespace.
+ *
+ * N.B. Due to USDT metadata being recorded in non-allocatable ELF note
+ * section, it has limitations when it comes to relocations, which, in
+ * practice, means that it's not possible to correctly share USDT semaphores
+ * between main executable and shared libraries, or even between multiple
+ * shared libraries. USDT semaphore has to be contained to individual shared
+ * library or executable to avoid unpleasant surprises with half-working USDT
+ * semaphores. We enforce this by marking semaphore ELF symbols as having
+ * a hidden visibility. This is quite an advanced use case and consideration
+ * and for most users this should have no consequences whatsoever.
+ */
+#define USDT_DEFINE_SEMA(sema) \
+ struct usdt_sema __usdt_sema_sec USDT_SEMA(sema) \
+ __usdt_asm_name(USDT_SEMA(sema)) \
+ __attribute__((visibility("hidden"))) = { 0 }
+
+/*
+ * Declare extern reference to user-defined USDT semaphore `sema`.
+ *
+ * Refers to a variable defined in another compilation unit by
+ * USDT_DEFINE_SEMA() and allows to use the same USDT semaphore across
+ * multiple compilation units (i.e., .c and .cpp files).
+ *
+ * See USDT_DEFINE_SEMA() notes above for C++ language usage peculiarities.
+ */
+#define USDT_DECLARE_SEMA(sema) \
+ extern struct usdt_sema USDT_SEMA(sema) __usdt_asm_name(USDT_SEMA(sema))
+
+/*
+ * Check if user-defined USDT semaphore `sema` is "active" (i.e., whether it
+ * is attached to by external tracing tooling and is actively observed).
+ *
+ * This macro can be used to decide whether any additional and potentially
+ * expensive data collection or processing should be done to pass extra
+ * information into USDT(s) associated with USDT semaphore `sema`.
+ *
+ * N.B. Such checks are necessarily racy. Between checking the state of USDT
+ * semaphore and triggering associated USDT(s), the active tracer might attach
+ * or detach. This race should be extremely rare and worst case should result
+ * in one-time missed USDT event or wasted extra data collection and
+ * processing. USDT-using tracers should be written with this in mind and is
+ * not a concern of the application defining USDTs with associated semaphore.
+ */
+#define USDT_SEMA_IS_ACTIVE(sema) (USDT_SEMA(sema).active > 0)
+
+/*
+ * Invoke USDT specified by `group` and `name` identifiers and associate
+ * explicitly user-defined semaphore `sema` with it. Pass through `args` as
+ * USDT arguments. `args` are optional and zero arguments are acceptable.
+ *
+ * Semaphore is defined with the help of USDT_DEFINE_SEMA() macro and can be
+ * checked whether active with USDT_SEMA_IS_ACTIVE().
+ */
+#ifdef __usdt_va_opt
+#define USDT_WITH_EXPLICIT_SEMA(sema, group, name, ...) \
+ __usdt_probe(group, name, __usdt_sema_explicit, USDT_SEMA(sema), ##__VA_ARGS__)
+#else
+#define USDT_WITH_EXPLICIT_SEMA(sema, group, name, ...) \
+ __usdt_probe(group, name, __usdt_sema_explicit, USDT_SEMA(sema) __VA_OPT__(,) __VA_ARGS__)
+#endif
+
+/*
+ * Adjustable implementation aspects
+ */
+#ifndef USDT_ARG_CONSTRAINT
+#if defined __powerpc__
+#define USDT_ARG_CONSTRAINT nZr
+#elif defined __arm__
+#define USDT_ARG_CONSTRAINT g
+#elif defined __loongarch__
+#define USDT_ARG_CONSTRAINT nmr
+#else
+#define USDT_ARG_CONSTRAINT nor
+#endif
+#endif /* USDT_ARG_CONSTRAINT */
+
+#ifndef USDT_NOP
+#if defined(__ia64__) || defined(__s390__) || defined(__s390x__)
+#define USDT_NOP nop 0
+#else
+#define USDT_NOP nop
+#endif
+#endif /* USDT_NOP */
+
+/*
+ * Implementation details
+ */
+/* USDT name for implicitly-defined USDT semaphore, derived from group:name */
+#define __usdt_sema_name(group, name) __usdt_sema_##group##__##name
+/* ELF section into which USDT semaphores are put */
+#define __usdt_sema_sec __attribute__((section(".probes")))
+
+#define __usdt_concat(a, b) a ## b
+#define __usdt_apply(fn, n) __usdt_concat(fn, n)
+
+#ifndef __usdt_nth
+#define __usdt_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, N, ...) N
+#endif
+
+#ifndef __usdt_narg
+#ifdef __usdt_va_opt
+#define __usdt_narg(...) __usdt_nth(_ __VA_OPT__(,) __VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#else
+#define __usdt_narg(...) __usdt_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#endif
+#endif /* __usdt_narg */
+
+#define __usdt_hash #
+#define __usdt_str_(x) #x
+#define __usdt_str(x) __usdt_str_(x)
+
+#ifndef __usdt_asm_name
+#define __usdt_asm_name(name) __asm__(__usdt_str(name))
+#endif
+
+#define __usdt_asm0() "\n"
+#define __usdt_asm1(x) __usdt_str(x) "\n"
+#define __usdt_asm2(x, ...) __usdt_str(x) "," __usdt_asm1(__VA_ARGS__)
+#define __usdt_asm3(x, ...) __usdt_str(x) "," __usdt_asm2(__VA_ARGS__)
+#define __usdt_asm4(x, ...) __usdt_str(x) "," __usdt_asm3(__VA_ARGS__)
+#define __usdt_asm5(x, ...) __usdt_str(x) "," __usdt_asm4(__VA_ARGS__)
+#define __usdt_asm6(x, ...) __usdt_str(x) "," __usdt_asm5(__VA_ARGS__)
+#define __usdt_asm7(x, ...) __usdt_str(x) "," __usdt_asm6(__VA_ARGS__)
+#define __usdt_asm8(x, ...) __usdt_str(x) "," __usdt_asm7(__VA_ARGS__)
+#define __usdt_asm9(x, ...) __usdt_str(x) "," __usdt_asm8(__VA_ARGS__)
+#define __usdt_asm10(x, ...) __usdt_str(x) "," __usdt_asm9(__VA_ARGS__)
+#define __usdt_asm11(x, ...) __usdt_str(x) "," __usdt_asm10(__VA_ARGS__)
+#define __usdt_asm12(x, ...) __usdt_str(x) "," __usdt_asm11(__VA_ARGS__)
+#define __usdt_asm(...) __usdt_apply(__usdt_asm, __usdt_narg(__VA_ARGS__))(__VA_ARGS__)
+
+#ifdef __LP64__
+#define __usdt_asm_addr .8byte
+#else
+#define __usdt_asm_addr .4byte
+#endif
+
+#define __usdt_asm_strz_(x) __usdt_asm1(.asciz #x)
+#define __usdt_asm_strz(x) __usdt_asm_strz_(x)
+#define __usdt_asm_str_(x) __usdt_asm1(.ascii #x)
+#define __usdt_asm_str(x) __usdt_asm_str_(x)
+
+/* "semaphoreless" USDT case */
+#ifndef __usdt_sema_none
+#define __usdt_sema_none(sema)
+#endif
+
+/* implicitly defined __usdt_sema__group__name semaphore (using weak symbols) */
+#ifndef __usdt_sema_implicit
+#define __usdt_sema_implicit(sema) \
+ __asm__ __volatile__ ( \
+ __usdt_asm1(.ifndef sema) \
+ __usdt_asm3( .pushsection .probes, "aw", "progbits") \
+ __usdt_asm1( .weak sema) \
+ __usdt_asm1( .hidden sema) \
+ __usdt_asm1( .align 2) \
+ __usdt_asm1(sema:) \
+ __usdt_asm1( .zero 2) \
+ __usdt_asm2( .type sema, @object) \
+ __usdt_asm2( .size sema, 2) \
+ __usdt_asm1( .popsection) \
+ __usdt_asm1(.endif) \
+ );
+#endif
+
+/* externally defined semaphore using USDT_DEFINE_SEMA() and passed explicitly by user */
+#ifndef __usdt_sema_explicit
+#define __usdt_sema_explicit(sema) \
+ __asm__ __volatile__ ("" :: "m" (sema));
+#endif
+
+/* main USDT definition (nop and .note.stapsdt metadata) */
+#define __usdt_probe(group, name, sema_def, sema, ...) do { \
+ sema_def(sema) \
+ __asm__ __volatile__ ( \
+ __usdt_asm( 990: USDT_NOP) \
+ __usdt_asm3( .pushsection .note.stapsdt, "", "note") \
+ __usdt_asm1( .balign 4) \
+ __usdt_asm3( .4byte 992f-991f,994f-993f,3) \
+ __usdt_asm1(991: .asciz "stapsdt") \
+ __usdt_asm1(992: .balign 4) \
+ __usdt_asm1(993: __usdt_asm_addr 990b) \
+ __usdt_asm1( __usdt_asm_addr _.stapsdt.base) \
+ __usdt_asm1( __usdt_asm_addr sema) \
+ __usdt_asm_strz(group) \
+ __usdt_asm_strz(name) \
+ __usdt_asm_args(__VA_ARGS__) \
+ __usdt_asm1( .ascii "\0") \
+ __usdt_asm1(994: .balign 4) \
+ __usdt_asm1( .popsection) \
+ __usdt_asm1(.ifndef _.stapsdt.base) \
+ __usdt_asm5( .pushsection .stapsdt.base,"aG","progbits",.stapsdt.base,comdat)\
+ __usdt_asm1( .weak _.stapsdt.base) \
+ __usdt_asm1( .hidden _.stapsdt.base) \
+ __usdt_asm1(_.stapsdt.base:) \
+ __usdt_asm1( .space 1) \
+ __usdt_asm2( .size _.stapsdt.base, 1) \
+ __usdt_asm1( .popsection) \
+ __usdt_asm1(.endif) \
+ :: __usdt_asm_ops(__VA_ARGS__) \
+ ); \
+} while (0)
+
+/*
+ * NB: gdb PR24541 highlighted an unspecified corner of the sdt.h
+ * operand note format.
+ *
+ * The named register may be a longer or shorter (!) alias for the
+ * storage where the value in question is found. For example, on
+ * i386, 64-bit value may be put in register pairs, and a register
+ * name stored would identify just one of them. Previously, gcc was
+ * asked to emit the %w[id] (16-bit alias of some registers holding
+ * operands), even when a wider 32-bit value was used.
+ *
+ * Bottom line: the byte-width given before the @ sign governs. If
+ * there is a mismatch between that width and that of the named
+ * register, then a sys/sdt.h note consumer may need to employ
+ * architecture-specific heuristics to figure out where the compiler
+ * has actually put the complete value.
+ */
+#if defined(__powerpc__) || defined(__powerpc64__)
+#define __usdt_argref(id) %I[id]%[id]
+#elif defined(__i386__)
+#define __usdt_argref(id) %k[id] /* gcc.gnu.org/PR80115 sourceware.org/PR24541 */
+#else
+#define __usdt_argref(id) %[id]
+#endif
+
+#define __usdt_asm_arg(n) __usdt_asm_str(%c[__usdt_asz##n]) \
+ __usdt_asm1(.ascii "@") \
+ __usdt_asm_str(__usdt_argref(__usdt_aval##n))
+
+#define __usdt_asm_args0 /* no arguments */
+#define __usdt_asm_args1 __usdt_asm_arg(1)
+#define __usdt_asm_args2 __usdt_asm_args1 __usdt_asm1(.ascii " ") __usdt_asm_arg(2)
+#define __usdt_asm_args3 __usdt_asm_args2 __usdt_asm1(.ascii " ") __usdt_asm_arg(3)
+#define __usdt_asm_args4 __usdt_asm_args3 __usdt_asm1(.ascii " ") __usdt_asm_arg(4)
+#define __usdt_asm_args5 __usdt_asm_args4 __usdt_asm1(.ascii " ") __usdt_asm_arg(5)
+#define __usdt_asm_args6 __usdt_asm_args5 __usdt_asm1(.ascii " ") __usdt_asm_arg(6)
+#define __usdt_asm_args7 __usdt_asm_args6 __usdt_asm1(.ascii " ") __usdt_asm_arg(7)
+#define __usdt_asm_args8 __usdt_asm_args7 __usdt_asm1(.ascii " ") __usdt_asm_arg(8)
+#define __usdt_asm_args9 __usdt_asm_args8 __usdt_asm1(.ascii " ") __usdt_asm_arg(9)
+#define __usdt_asm_args10 __usdt_asm_args9 __usdt_asm1(.ascii " ") __usdt_asm_arg(10)
+#define __usdt_asm_args11 __usdt_asm_args10 __usdt_asm1(.ascii " ") __usdt_asm_arg(11)
+#define __usdt_asm_args12 __usdt_asm_args11 __usdt_asm1(.ascii " ") __usdt_asm_arg(12)
+#define __usdt_asm_args(...) __usdt_apply(__usdt_asm_args, __usdt_narg(__VA_ARGS__))
+
+#define __usdt_is_arr(x) (__builtin_classify_type(x) == 14 || __builtin_classify_type(x) == 5)
+#define __usdt_arg_size(x) (__usdt_is_arr(x) ? sizeof(void *) : sizeof(x))
+
+/*
+ * We can't use __builtin_choose_expr() in C++, so fall back to table-based
+ * signedness determination for known types, utilizing templates magic.
+ */
+#ifdef __cplusplus
+
+#define __usdt_is_signed(x) (!__usdt_is_arr(x) && __usdt_t<__typeof(x)>::is_signed)
+
+#include <cstddef>
+
+template<typename T> struct __usdt_t { static const bool is_signed = false; };
+template<typename A> struct __usdt_t<A[]> : public __usdt_t<A *> {};
+template<typename A, size_t N> struct __usdt_t<A[N]> : public __usdt_t<A *> {};
+
+#define __usdt_def_signed(T) \
+template<> struct __usdt_t<T> { static const bool is_signed = true; }; \
+template<> struct __usdt_t<const T> { static const bool is_signed = true; }; \
+template<> struct __usdt_t<volatile T> { static const bool is_signed = true; }; \
+template<> struct __usdt_t<const volatile T> { static const bool is_signed = true; }
+#define __usdt_maybe_signed(T) \
+template<> struct __usdt_t<T> { static const bool is_signed = (T)-1 < (T)1; }; \
+template<> struct __usdt_t<const T> { static const bool is_signed = (T)-1 < (T)1; }; \
+template<> struct __usdt_t<volatile T> { static const bool is_signed = (T)-1 < (T)1; }; \
+template<> struct __usdt_t<const volatile T> { static const bool is_signed = (T)-1 < (T)1; }
+
+__usdt_def_signed(signed char);
+__usdt_def_signed(short);
+__usdt_def_signed(int);
+__usdt_def_signed(long);
+__usdt_def_signed(long long);
+__usdt_maybe_signed(char);
+__usdt_maybe_signed(wchar_t);
+
+#else /* !__cplusplus */
+
+#define __usdt_is_inttype(x) (__builtin_classify_type(x) >= 1 && __builtin_classify_type(x) <= 4)
+#define __usdt_inttype(x) __typeof(__builtin_choose_expr(__usdt_is_inttype(x), (x), 0U))
+#define __usdt_is_signed(x) ((__usdt_inttype(x))-1 < (__usdt_inttype(x))1)
+
+#endif /* __cplusplus */
+
+#define __usdt_asm_op(n, x) \
+ [__usdt_asz##n] "n" ((__usdt_is_signed(x) ? (int)-1 : 1) * (int)__usdt_arg_size(x)), \
+ [__usdt_aval##n] __usdt_str(USDT_ARG_CONSTRAINT)(x)
+
+#define __usdt_asm_ops0() [__usdt_dummy] "g" (0)
+#define __usdt_asm_ops1(x) __usdt_asm_op(1, x)
+#define __usdt_asm_ops2(a,x) __usdt_asm_ops1(a), __usdt_asm_op(2, x)
+#define __usdt_asm_ops3(a,b,x) __usdt_asm_ops2(a,b), __usdt_asm_op(3, x)
+#define __usdt_asm_ops4(a,b,c,x) __usdt_asm_ops3(a,b,c), __usdt_asm_op(4, x)
+#define __usdt_asm_ops5(a,b,c,d,x) __usdt_asm_ops4(a,b,c,d), __usdt_asm_op(5, x)
+#define __usdt_asm_ops6(a,b,c,d,e,x) __usdt_asm_ops5(a,b,c,d,e), __usdt_asm_op(6, x)
+#define __usdt_asm_ops7(a,b,c,d,e,f,x) __usdt_asm_ops6(a,b,c,d,e,f), __usdt_asm_op(7, x)
+#define __usdt_asm_ops8(a,b,c,d,e,f,g,x) __usdt_asm_ops7(a,b,c,d,e,f,g), __usdt_asm_op(8, x)
+#define __usdt_asm_ops9(a,b,c,d,e,f,g,h,x) __usdt_asm_ops8(a,b,c,d,e,f,g,h), __usdt_asm_op(9, x)
+#define __usdt_asm_ops10(a,b,c,d,e,f,g,h,i,x) __usdt_asm_ops9(a,b,c,d,e,f,g,h,i), __usdt_asm_op(10, x)
+#define __usdt_asm_ops11(a,b,c,d,e,f,g,h,i,j,x) __usdt_asm_ops10(a,b,c,d,e,f,g,h,i,j), __usdt_asm_op(11, x)
+#define __usdt_asm_ops12(a,b,c,d,e,f,g,h,i,j,k,x) __usdt_asm_ops11(a,b,c,d,e,f,g,h,i,j,k), __usdt_asm_op(12, x)
+#define __usdt_asm_ops(...) __usdt_apply(__usdt_asm_ops, __usdt_narg(__VA_ARGS__))(__VA_ARGS__)
+
+#endif /* __USDT_H */
diff --git a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
index b616575c3b00..ce13002c7a19 100644
--- a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
+++ b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
@@ -93,7 +93,7 @@
.expected_attach_type = BPF_SK_LOOKUP,
.result = VERBOSE_ACCEPT,
.runs = -1,
- .errstr = "0: (7a) *(u64 *)(r10 -8) = -44 ; R10=fp0 fp-8_w=-44\
+ .errstr = "0: (7a) *(u64 *)(r10 -8) = -44 ; R10=fp0 fp-8=-44\
2: (c5) if r0 s< 0x0 goto pc+2\
- R0_w=-44",
+ R0=-44",
},
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index f3492efc8834..c8d640802cce 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -1375,7 +1375,7 @@
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
/* write into map value */
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
- /* fetch secound map_value_ptr from the stack */
+ /* fetch second map_value_ptr from the stack */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
/* write into map value */
@@ -1439,7 +1439,7 @@
/* second time with fp-16 */
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
- /* fetch secound map_value_ptr from the stack */
+ /* fetch second map_value_ptr from the stack */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
/* write into map value */
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
@@ -1493,7 +1493,7 @@
/* second time with fp-16 */
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- /* fetch secound map_value_ptr from the stack */
+ /* fetch second map_value_ptr from the stack */
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
/* write into map value */
BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
@@ -2380,7 +2380,7 @@
*/
BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
BPF_MOV64_REG(BPF_REG_9, BPF_REG_8),
- /* r9 = *r9 ; verifier get's to this point via two paths:
+ /* r9 = *r9 ; verifier gets to this point via two paths:
* ; (I) one including r9 = r8, verified first;
* ; (II) one excluding r9 = r8, verified next.
* ; After load of *r9 to r9 the frame[0].fp[-24].id == r9.id.
diff --git a/tools/testing/selftests/bpf/verify_sig_setup.sh b/tools/testing/selftests/bpf/verify_sig_setup.sh
index f2cac42298ba..09179fb551f0 100755
--- a/tools/testing/selftests/bpf/verify_sig_setup.sh
+++ b/tools/testing/selftests/bpf/verify_sig_setup.sh
@@ -32,7 +32,7 @@ usage()
exit 1
}
-setup()
+genkey()
{
local tmp_dir="$1"
@@ -45,9 +45,14 @@ setup()
openssl x509 -in ${tmp_dir}/signing_key.pem -out \
${tmp_dir}/signing_key.der -outform der
+}
- key_id=$(cat ${tmp_dir}/signing_key.der | keyctl padd asymmetric ebpf_testing_key @s)
+setup()
+{
+ local tmp_dir="$1"
+ genkey "${tmp_dir}"
+ key_id=$(cat ${tmp_dir}/signing_key.der | keyctl padd asymmetric ebpf_testing_key @s)
keyring_id=$(keyctl newring ebpf_testing_keyring @s)
keyctl link $key_id $keyring_id
}
@@ -105,6 +110,8 @@ main()
if [[ "${action}" == "setup" ]]; then
setup "${tmp_dir}"
+ elif [[ "${action}" == "genkey" ]]; then
+ genkey "${tmp_dir}"
elif [[ "${action}" == "cleanup" ]]; then
cleanup "${tmp_dir}"
elif [[ "${action}" == "fsverity-create-sign" ]]; then
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index d532dd82a3a8..e962f133250c 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -181,6 +181,12 @@ struct var_preset {
bool applied;
};
+enum dump_mode {
+ DUMP_NONE = 0,
+ DUMP_XLATED = 1,
+ DUMP_JITED = 2,
+};
+
static struct env {
char **filenames;
int filename_cnt;
@@ -227,6 +233,7 @@ static struct env {
char orig_cgroup[PATH_MAX];
char stat_cgroup[PATH_MAX];
int memory_peak_fd;
+ __u32 dump_mode;
} env;
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
@@ -271,6 +278,7 @@ const char argp_program_doc[] =
enum {
OPT_LOG_FIXED = 1000,
OPT_LOG_SIZE = 1001,
+ OPT_DUMP = 1002,
};
static const struct argp_option opts[] = {
@@ -295,6 +303,7 @@ static const struct argp_option opts[] = {
"Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
{ "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" },
{ "set-global-vars", 'G', "GLOBAL", 0, "Set global variables provided in the expression, for example \"var1 = 1\"" },
+ { "dump", OPT_DUMP, "DUMP_MODE", OPTION_ARG_OPTIONAL, "Print BPF program dump (xlated, jited)" },
{},
};
@@ -427,6 +436,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
return err;
}
break;
+ case OPT_DUMP:
+ if (!arg || strcasecmp(arg, "xlated") == 0) {
+ env.dump_mode |= DUMP_XLATED;
+ } else if (strcasecmp(arg, "jited") == 0) {
+ env.dump_mode |= DUMP_JITED;
+ } else {
+ fprintf(stderr, "Unrecognized dump mode '%s'\n", arg);
+ return -EINVAL;
+ }
+ break;
default:
return ARGP_ERR_UNKNOWN;
}
@@ -1554,6 +1573,36 @@ static int parse_rvalue(const char *val, struct rvalue *rvalue)
return 0;
}
+static void dump(__u32 prog_id, enum dump_mode mode, const char *file_name, const char *prog_name)
+{
+ char command[64], buf[4096];
+ FILE *fp;
+ int status;
+
+ status = system("command -v bpftool > /dev/null 2>&1");
+ if (status != 0) {
+ fprintf(stderr, "bpftool is not available, can't print program dump\n");
+ return;
+ }
+ snprintf(command, sizeof(command), "bpftool prog dump %s id %u",
+ mode == DUMP_JITED ? "jited" : "xlated", prog_id);
+ fp = popen(command, "r");
+ if (!fp) {
+ fprintf(stderr, "bpftool failed with error: %d\n", errno);
+ return;
+ }
+
+ printf("DUMP (%s) %s/%s:\n", mode == DUMP_JITED ? "JITED" : "XLATED", file_name, prog_name);
+ while (fgets(buf, sizeof(buf), fp))
+ fputs(buf, stdout);
+ fprintf(stdout, "\n");
+
+ if (ferror(fp))
+ fprintf(stderr, "Failed to dump BPF prog with error: %d\n", errno);
+
+ pclose(fp);
+}
+
static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
{
const char *base_filename = basename(strdupa(filename));
@@ -1630,8 +1679,13 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
memset(&info, 0, info_len);
fd = bpf_program__fd(prog);
- if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0)
+ if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0) {
stats->stats[JITED_SIZE] = info.jited_prog_len;
+ if (env.dump_mode & DUMP_JITED)
+ dump(info.id, DUMP_JITED, base_filename, prog_name);
+ if (env.dump_mode & DUMP_XLATED)
+ dump(info.id, DUMP_XLATED, base_filename, prog_name);
+ }
parse_verif_log(buf, buf_sz, stats);
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index 1503a1d2faa0..9ed8c796645d 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -155,7 +155,7 @@ int main(int argc, char **argv)
}
if (!server) {
- /* Only supports IPv4; see hints initiailization above. */
+ /* Only supports IPv4; see hints initialization above. */
if (getaddrinfo(argv[optind], NULL, &hints, &a) || !a) {
fprintf(stderr, "Could not resolve %s\n", argv[optind]);
return 1;
diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
index 93c2cc413cfc..48729da142c2 100644
--- a/tools/testing/selftests/bpf/xsk.h
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -93,8 +93,8 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
/* Refresh the local tail pointer.
* cached_cons is r->size bigger than the real consumer pointer so
* that this addition can be avoided in the more frequently
- * executed code that computs free_entries in the beginning of
- * this function. Without this optimization it whould have been
+ * executed code that computes free_entries in the beginning of
+ * this function. Without this optimization it would have been
* free_entries = r->cached_prod - r->cached_cons + r->size.
*/
r->cached_cons = __atomic_load_n(r->consumer, __ATOMIC_ACQUIRE);
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index a29de0713f19..352adc8df2d1 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -2276,25 +2276,13 @@ static int testapp_xdp_metadata_copy(struct test_spec *test)
{
struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
- struct bpf_map *data_map;
- int count = 0;
- int key = 0;
test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata,
skel_tx->progs.xsk_xdp_populate_metadata,
skel_rx->maps.xsk, skel_tx->maps.xsk);
test->ifobj_rx->use_metadata = true;
- data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
- if (!data_map || !bpf_map__is_internal(data_map)) {
- ksft_print_msg("Error: could not find bss section of XDP program\n");
- return TEST_FAILURE;
- }
-
- if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY)) {
- ksft_print_msg("Error: could not update count element\n");
- return TEST_FAILURE;
- }
+ skel_rx->bss->count = 0;
return testapp_validate_traffic(test);
}
diff --git a/tools/testing/selftests/cgroup/lib/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c
index 0e89fcff4d05..44c52f620fda 100644
--- a/tools/testing/selftests/cgroup/lib/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c
@@ -522,6 +522,18 @@ int proc_mount_contains(const char *option)
return strstr(buf, option) != NULL;
}
+int cgroup_feature(const char *feature)
+{
+ char buf[PAGE_SIZE];
+ ssize_t read;
+
+ read = read_text("/sys/kernel/cgroup/features", buf, sizeof(buf));
+ if (read < 0)
+ return read;
+
+ return strstr(buf, feature) != NULL;
+}
+
ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
{
char path[PATH_MAX];
diff --git a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
index c69cab66254b..9dc90a1b386d 100644
--- a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
@@ -60,6 +60,7 @@ extern int cg_run_nowait(const char *cgroup,
extern int cg_wait_for_proc_count(const char *cgroup, int count);
extern int cg_killall(const char *cgroup);
int proc_mount_contains(const char *option);
+int cgroup_feature(const char *feature);
extern ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size);
extern int proc_read_strstr(int pid, bool thread, const char *item, const char *needle);
extern pid_t clone_into_cgroup(int cgroup_fd);
diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c
index 8730645d363a..dfb763819581 100644
--- a/tools/testing/selftests/cgroup/test_freezer.c
+++ b/tools/testing/selftests/cgroup/test_freezer.c
@@ -804,6 +804,662 @@ cleanup:
return ret;
}
+/*
+ * Get the current frozen_usec for the cgroup.
+ */
+static long cg_check_freezetime(const char *cgroup)
+{
+ return cg_read_key_long(cgroup, "cgroup.stat.local",
+ "frozen_usec ");
+}
+
+/*
+ * Test that the freeze time will behave as expected for an empty cgroup.
+ */
+static int test_cgfreezer_time_empty(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cgroup = NULL;
+ long prev, curr;
+
+ cgroup = cg_name(root, "cg_time_test_empty");
+ if (!cgroup)
+ goto cleanup;
+
+ /*
+ * 1) Create an empty cgroup and check that its freeze time
+ * is 0.
+ */
+ if (cg_create(cgroup))
+ goto cleanup;
+
+ curr = cg_check_freezetime(cgroup);
+ if (curr < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+ if (curr > 0) {
+ debug("Expect time (%ld) to be 0\n", curr);
+ goto cleanup;
+ }
+
+ if (cg_freeze_nowait(cgroup, true))
+ goto cleanup;
+
+ /*
+ * 2) Sleep for 1000 us. Check that the freeze time is at
+ * least 1000 us.
+ */
+ usleep(1000);
+ curr = cg_check_freezetime(cgroup);
+ if (curr < 1000) {
+ debug("Expect time (%ld) to be at least 1000 us\n",
+ curr);
+ goto cleanup;
+ }
+
+ /*
+ * 3) Unfreeze the cgroup. Check that the freeze time is
+ * larger than at 2).
+ */
+ if (cg_freeze_nowait(cgroup, false))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ /*
+ * 4) Check the freeze time again to ensure that it has not
+ * changed.
+ */
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr != prev) {
+ debug("Expect time (%ld) to be unchanged from previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (cgroup)
+ cg_destroy(cgroup);
+ free(cgroup);
+ return ret;
+}
+
+/*
+ * A simple test for cgroup freezer time accounting. This test follows
+ * the same flow as test_cgfreezer_time_empty, but with a single process
+ * in the cgroup.
+ */
+static int test_cgfreezer_time_simple(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cgroup = NULL;
+ long prev, curr;
+
+ cgroup = cg_name(root, "cg_time_test_simple");
+ if (!cgroup)
+ goto cleanup;
+
+ /*
+ * 1) Create a cgroup and check that its freeze time is 0.
+ */
+ if (cg_create(cgroup))
+ goto cleanup;
+
+ curr = cg_check_freezetime(cgroup);
+ if (curr < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+ if (curr > 0) {
+ debug("Expect time (%ld) to be 0\n", curr);
+ goto cleanup;
+ }
+
+ /*
+ * 2) Populate the cgroup with one child and check that the
+ * freeze time is still 0.
+ */
+ cg_run_nowait(cgroup, child_fn, NULL);
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr > prev) {
+ debug("Expect time (%ld) to be 0\n", curr);
+ goto cleanup;
+ }
+
+ if (cg_freeze_nowait(cgroup, true))
+ goto cleanup;
+
+ /*
+ * 3) Sleep for 1000 us. Check that the freeze time is at
+ * least 1000 us.
+ */
+ usleep(1000);
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr < 1000) {
+ debug("Expect time (%ld) to be at least 1000 us\n",
+ curr);
+ goto cleanup;
+ }
+
+ /*
+ * 4) Unfreeze the cgroup. Check that the freeze time is
+ * larger than at 3).
+ */
+ if (cg_freeze_nowait(cgroup, false))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ /*
+ * 5) Sleep for 1000 us. Check that the freeze time is the
+ * same as at 4).
+ */
+ usleep(1000);
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr != prev) {
+ debug("Expect time (%ld) to be unchanged from previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (cgroup)
+ cg_destroy(cgroup);
+ free(cgroup);
+ return ret;
+}
+
+/*
+ * Test that freezer time accounting works as expected, even while we're
+ * populating a cgroup with processes.
+ */
+static int test_cgfreezer_time_populate(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cgroup = NULL;
+ long prev, curr;
+ int i;
+
+ cgroup = cg_name(root, "cg_time_test_populate");
+ if (!cgroup)
+ goto cleanup;
+
+ if (cg_create(cgroup))
+ goto cleanup;
+
+ curr = cg_check_freezetime(cgroup);
+ if (curr < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+ if (curr > 0) {
+ debug("Expect time (%ld) to be 0\n", curr);
+ goto cleanup;
+ }
+
+ /*
+ * 1) Populate the cgroup with 100 processes. Check that
+ * the freeze time is 0.
+ */
+ for (i = 0; i < 100; i++)
+ cg_run_nowait(cgroup, child_fn, NULL);
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr != prev) {
+ debug("Expect time (%ld) to be 0\n", curr);
+ goto cleanup;
+ }
+
+ /*
+ * 2) Wait for the group to become fully populated. Check
+ * that the freeze time is 0.
+ */
+ if (cg_wait_for_proc_count(cgroup, 100))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr != prev) {
+ debug("Expect time (%ld) to be 0\n", curr);
+ goto cleanup;
+ }
+
+ /*
+ * 3) Freeze the cgroup and then populate it with 100 more
+ * processes. Check that the freeze time continues to grow.
+ */
+ if (cg_freeze_nowait(cgroup, true))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ for (i = 0; i < 100; i++)
+ cg_run_nowait(cgroup, child_fn, NULL);
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ /*
+ * 4) Wait for the group to become fully populated. Check
+ * that the freeze time is larger than at 3).
+ */
+ if (cg_wait_for_proc_count(cgroup, 200))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ /*
+ * 5) Unfreeze the cgroup. Check that the freeze time is
+ * larger than at 4).
+ */
+ if (cg_freeze_nowait(cgroup, false))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ /*
+ * 6) Kill the processes. Check that the freeze time is the
+ * same as it was at 5).
+ */
+ if (cg_killall(cgroup))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr != prev) {
+ debug("Expect time (%ld) to be unchanged from previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ /*
+ * 7) Freeze and unfreeze the cgroup. Check that the freeze
+ * time is larger than it was at 6).
+ */
+ if (cg_freeze_nowait(cgroup, true))
+ goto cleanup;
+ if (cg_freeze_nowait(cgroup, false))
+ goto cleanup;
+ prev = curr;
+ curr = cg_check_freezetime(cgroup);
+ if (curr <= prev) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr, prev);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (cgroup)
+ cg_destroy(cgroup);
+ free(cgroup);
+ return ret;
+}
+
+/*
+ * Test that frozen time for a cgroup continues to work as expected,
+ * even as processes are migrated. Frozen cgroup A's freeze time should
+ * continue to increase and running cgroup B's should stay 0.
+ */
+static int test_cgfreezer_time_migrate(const char *root)
+{
+ long prev_A, curr_A, curr_B;
+ char *cgroup[2] = {0};
+ int ret = KSFT_FAIL;
+ int pid;
+
+ cgroup[0] = cg_name(root, "cg_time_test_migrate_A");
+ if (!cgroup[0])
+ goto cleanup;
+
+ cgroup[1] = cg_name(root, "cg_time_test_migrate_B");
+ if (!cgroup[1])
+ goto cleanup;
+
+ if (cg_create(cgroup[0]))
+ goto cleanup;
+
+ if (cg_check_freezetime(cgroup[0]) < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (cg_create(cgroup[1]))
+ goto cleanup;
+
+ pid = cg_run_nowait(cgroup[0], child_fn, NULL);
+ if (pid < 0)
+ goto cleanup;
+
+ if (cg_wait_for_proc_count(cgroup[0], 1))
+ goto cleanup;
+
+ curr_A = cg_check_freezetime(cgroup[0]);
+ if (curr_A) {
+ debug("Expect time (%ld) to be 0\n", curr_A);
+ goto cleanup;
+ }
+ curr_B = cg_check_freezetime(cgroup[1]);
+ if (curr_B) {
+ debug("Expect time (%ld) to be 0\n", curr_B);
+ goto cleanup;
+ }
+
+ /*
+ * Freeze cgroup A.
+ */
+ if (cg_freeze_wait(cgroup[0], true))
+ goto cleanup;
+ prev_A = curr_A;
+ curr_A = cg_check_freezetime(cgroup[0]);
+ if (curr_A <= prev_A) {
+ debug("Expect time (%ld) to be > 0\n", curr_A);
+ goto cleanup;
+ }
+
+ /*
+ * Migrate from A (frozen) to B (running).
+ */
+ if (cg_enter(cgroup[1], pid))
+ goto cleanup;
+
+ usleep(1000);
+ curr_B = cg_check_freezetime(cgroup[1]);
+ if (curr_B) {
+ debug("Expect time (%ld) to be 0\n", curr_B);
+ goto cleanup;
+ }
+
+ prev_A = curr_A;
+ curr_A = cg_check_freezetime(cgroup[0]);
+ if (curr_A <= prev_A) {
+ debug("Expect time (%ld) to be more than previous check (%ld)\n",
+ curr_A, prev_A);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (cgroup[0])
+ cg_destroy(cgroup[0]);
+ free(cgroup[0]);
+ if (cgroup[1])
+ cg_destroy(cgroup[1]);
+ free(cgroup[1]);
+ return ret;
+}
+
+/*
+ * The test creates a cgroup and freezes it. Then it creates a child cgroup.
+ * After that it checks that the child cgroup has a non-zero freeze time
+ * that is less than the parent's. Next, it freezes the child, unfreezes
+ * the parent, and sleeps. Finally, it checks that the child's freeze
+ * time has grown larger than the parent's.
+ */
+static int test_cgfreezer_time_parent(const char *root)
+{
+ char *parent, *child = NULL;
+ int ret = KSFT_FAIL;
+ long ptime, ctime;
+
+ parent = cg_name(root, "cg_test_parent_A");
+ if (!parent)
+ goto cleanup;
+
+ child = cg_name(parent, "cg_test_parent_B");
+ if (!child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_check_freezetime(parent) < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (cg_freeze_wait(parent, true))
+ goto cleanup;
+
+ usleep(1000);
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_check_frozen(child, true))
+ goto cleanup;
+
+ /*
+ * Since the parent was frozen the entire time the child cgroup
+ * was being created, we expect the parent's freeze time to be
+ * larger than the child's.
+ *
+ * Ideally, we would be able to check both times simultaneously,
+ * but here we get the child's after we get the parent's.
+ */
+ ptime = cg_check_freezetime(parent);
+ ctime = cg_check_freezetime(child);
+ if (ptime <= ctime) {
+ debug("Expect ptime (%ld) > ctime (%ld)\n", ptime, ctime);
+ goto cleanup;
+ }
+
+ if (cg_freeze_nowait(child, true))
+ goto cleanup;
+
+ if (cg_freeze_wait(parent, false))
+ goto cleanup;
+
+ if (cg_check_frozen(child, true))
+ goto cleanup;
+
+ usleep(100000);
+
+ ctime = cg_check_freezetime(child);
+ ptime = cg_check_freezetime(parent);
+
+ if (ctime <= ptime) {
+ debug("Expect ctime (%ld) > ptime (%ld)\n", ctime, ptime);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ free(child);
+ if (parent)
+ cg_destroy(parent);
+ free(parent);
+ return ret;
+}
+
+/*
+ * The test creates a parent cgroup and a child cgroup. Then, it freezes
+ * the child and checks that the child's freeze time is greater than the
+ * parent's, which should be zero.
+ */
+static int test_cgfreezer_time_child(const char *root)
+{
+ char *parent, *child = NULL;
+ int ret = KSFT_FAIL;
+ long ptime, ctime;
+
+ parent = cg_name(root, "cg_test_child_A");
+ if (!parent)
+ goto cleanup;
+
+ child = cg_name(parent, "cg_test_child_B");
+ if (!child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_check_freezetime(parent) < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_freeze_wait(child, true))
+ goto cleanup;
+
+ ctime = cg_check_freezetime(child);
+ ptime = cg_check_freezetime(parent);
+ if (ptime != 0) {
+ debug("Expect ptime (%ld) to be 0\n", ptime);
+ goto cleanup;
+ }
+
+ if (ctime <= ptime) {
+ debug("Expect ctime (%ld) <= ptime (%ld)\n", ctime, ptime);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ free(child);
+ if (parent)
+ cg_destroy(parent);
+ free(parent);
+ return ret;
+}
+
+/*
+ * The test creates the following hierarchy:
+ * A
+ * |
+ * B
+ * |
+ * C
+ *
+ * Then it freezes the cgroups in the order C, B, A.
+ * Then it unfreezes the cgroups in the order A, B, C.
+ * Then it checks that C's freeze time is larger than B's and
+ * that B's is larger than A's.
+ */
+static int test_cgfreezer_time_nested(const char *root)
+{
+ char *cgroup[3] = {0};
+ int ret = KSFT_FAIL;
+ long time[3] = {0};
+ int i;
+
+ cgroup[0] = cg_name(root, "cg_test_time_A");
+ if (!cgroup[0])
+ goto cleanup;
+
+ cgroup[1] = cg_name(cgroup[0], "B");
+ if (!cgroup[1])
+ goto cleanup;
+
+ cgroup[2] = cg_name(cgroup[1], "C");
+ if (!cgroup[2])
+ goto cleanup;
+
+ if (cg_create(cgroup[0]))
+ goto cleanup;
+
+ if (cg_check_freezetime(cgroup[0]) < 0) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (cg_create(cgroup[1]))
+ goto cleanup;
+
+ if (cg_create(cgroup[2]))
+ goto cleanup;
+
+ if (cg_freeze_nowait(cgroup[2], true))
+ goto cleanup;
+
+ if (cg_freeze_nowait(cgroup[1], true))
+ goto cleanup;
+
+ if (cg_freeze_nowait(cgroup[0], true))
+ goto cleanup;
+
+ usleep(1000);
+
+ if (cg_freeze_nowait(cgroup[0], false))
+ goto cleanup;
+
+ if (cg_freeze_nowait(cgroup[1], false))
+ goto cleanup;
+
+ if (cg_freeze_nowait(cgroup[2], false))
+ goto cleanup;
+
+ time[2] = cg_check_freezetime(cgroup[2]);
+ time[1] = cg_check_freezetime(cgroup[1]);
+ time[0] = cg_check_freezetime(cgroup[0]);
+
+ if (time[2] <= time[1]) {
+ debug("Expect C's time (%ld) > B's time (%ld)", time[2], time[1]);
+ goto cleanup;
+ }
+
+ if (time[1] <= time[0]) {
+ debug("Expect B's time (%ld) > A's time (%ld)", time[1], time[0]);
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ for (i = 2; i >= 0 && cgroup[i]; i--) {
+ cg_destroy(cgroup[i]);
+ free(cgroup[i]);
+ }
+
+ return ret;
+}
+
#define T(x) { x, #x }
struct cgfreezer_test {
int (*fn)(const char *root);
@@ -819,6 +1475,13 @@ struct cgfreezer_test {
T(test_cgfreezer_stopped),
T(test_cgfreezer_ptraced),
T(test_cgfreezer_vfork),
+ T(test_cgfreezer_time_empty),
+ T(test_cgfreezer_time_simple),
+ T(test_cgfreezer_time_populate),
+ T(test_cgfreezer_time_migrate),
+ T(test_cgfreezer_time_parent),
+ T(test_cgfreezer_time_child),
+ T(test_cgfreezer_time_nested),
};
#undef T
diff --git a/tools/testing/selftests/cgroup/test_pids.c b/tools/testing/selftests/cgroup/test_pids.c
index 9ecb83c6cc5c..d8a1d1cd5007 100644
--- a/tools/testing/selftests/cgroup/test_pids.c
+++ b/tools/testing/selftests/cgroup/test_pids.c
@@ -77,6 +77,9 @@ static int test_pids_events(const char *root)
char *cg_parent = NULL, *cg_child = NULL;
int pid;
+ if (cgroup_feature("pids_localevents") <= 0)
+ return KSFT_SKIP;
+
cg_parent = cg_name(root, "pids_parent");
cg_child = cg_name(cg_parent, "pids_child");
if (!cg_parent || !cg_child)
diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c
index 5a5a7a5f7e1d..a4ac80bb1003 100644
--- a/tools/testing/selftests/coredump/stackdump_test.c
+++ b/tools/testing/selftests/coredump/stackdump_test.c
@@ -446,9 +446,6 @@ TEST_F(coredump, socket_detect_userspace_client)
if (info.coredump_mask & PIDFD_COREDUMPED)
goto out;
- if (read(fd_coredump, &c, 1) < 1)
- goto out;
-
exit_code = EXIT_SUCCESS;
out:
if (fd_peer_pidfd >= 0)
diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index 5b230deb19e8..9a3499827d4b 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -4,6 +4,7 @@
TEST_GEN_FILES += access_memory access_memory_even
TEST_FILES = _damon_sysfs.py
+TEST_FILES += drgn_dump_damon_status.py
# functionality tests
TEST_PROGS += sysfs.sh
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index 2b10854e4b1e..44b98f17f8ff 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -10,7 +10,8 @@ TEST_PROGS := \
mode-2-recovery-updelay.sh \
bond_options.sh \
bond-eth-type-change.sh \
- bond_macvlan_ipvlan.sh
+ bond_macvlan_ipvlan.sh \
+ bond_passive_lacp.sh
TEST_FILES := \
lag_lib.sh \
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
index 7bc148889ca7..187b478d0ddf 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -7,6 +7,8 @@ ALL_TESTS="
prio
arp_validate
num_grat_arp
+ fail_over_mac
+ vlan_over_bond
"
lib_dir=$(dirname "$0")
@@ -352,8 +354,8 @@ garp_test()
exp_num=$(echo "${param}" | cut -f6 -d ' ')
active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
- slowwait_for_counter $((exp_num + 5)) $exp_num \
- tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}"
+ slowwait_for_counter $((exp_num + 5)) $exp_num tc_rule_handle_stats_get \
+ "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}" &> /dev/null
# check result
real_num=$(tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}")
@@ -376,6 +378,197 @@ num_grat_arp()
done
}
+check_all_mac_same()
+{
+ RET=0
+ # all slaves should have same mac address (with the first port's mac)
+ local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]')
+ local eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]')
+ local eth1_mac=$(ip -n "$s_ns" -j link show eth1 | jq -r '.[]["address"]')
+ local eth2_mac=$(ip -n "$s_ns" -j link show eth2 | jq -r '.[]["address"]')
+ if [ "$bond_mac" != "${mac[0]}" ] || [ "$eth0_mac" != "$bond_mac" ] || \
+ [ "$eth1_mac" != "$bond_mac" ] || [ "$eth2_mac" != "$bond_mac" ]; then
+ RET=1
+ fi
+}
+
+check_bond_mac_same_with_first()
+{
+ RET=0
+ # bond mac address should be same with the first added slave
+ local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]')
+ if [ "$bond_mac" != "${mac[0]}" ]; then
+ RET=1
+ fi
+}
+
+check_bond_mac_same_with_active()
+{
+ RET=0
+ # bond mac address should be same with active slave
+ local bond_mac=$(ip -n "$s_ns" -j link show bond0 | jq -r '.[]["address"]')
+ local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+ local active_slave_mac=$(ip -n "$s_ns" -j link show "$active_slave" | jq -r '.[]["address"]')
+ if [ "$bond_mac" != "$active_slave_mac" ]; then
+ RET=1
+ fi
+}
+
+check_backup_slave_mac_not_change()
+{
+ RET=0
+ # backup slave's mac address is not changed
+ if ip -n "$s_ns" -d -j link show type bond_slave | jq -e '.[]
+ | select(.linkinfo.info_slave_data.state=="BACKUP")
+ | select(.address != .linkinfo.info_slave_data.perm_hwaddr)' &> /dev/null; then
+ RET=1
+ fi
+}
+
+check_backup_slave_mac_inherit()
+{
+ local backup_mac
+ RET=0
+
+ # backup slaves should use mac[1] or mac[2]
+ local backup_macs=$(ip -n "$s_ns" -d -j link show type bond_slave | \
+ jq -r '.[] | select(.linkinfo.info_slave_data.state=="BACKUP") | .address')
+ for backup_mac in $backup_macs; do
+ if [ "$backup_mac" != "${mac[1]}" ] && [ "$backup_mac" != "${mac[2]}" ]; then
+ RET=1
+ fi
+ done
+}
+
+check_first_slave_random_mac()
+{
+ RET=0
+ # remove the first added slave and added it back
+ ip -n "$s_ns" link set eth0 nomaster
+ ip -n "$s_ns" link set eth0 master bond0
+
+ # the first slave should use random mac address
+ eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]')
+ [ "$eth0_mac" = "${mac[0]}" ] && RET=1
+ log_test "bond fail_over_mac follow" "random first slave mac"
+
+ # remove the first slave, the permanent MAC address should be restored back
+ ip -n "$s_ns" link set eth0 nomaster
+ eth0_mac=$(ip -n "$s_ns" -j link show eth0 | jq -r '.[]["address"]')
+ [ "$eth0_mac" != "${mac[0]}" ] && RET=1
+}
+
+do_active_backup_failover()
+{
+ local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+ ip -n ${s_ns} link set ${active_slave} down
+ slowwait 2 active_slave_changed $active_slave
+ ip -n ${s_ns} link set ${active_slave} up
+}
+
+fail_over_mac()
+{
+ # Bring down the first interface on the switch to force the bond to
+ # select another active interface instead of the first one that joined.
+ ip -n "$g_ns" link set s0 down
+
+ # fail_over_mac none
+ bond_reset "mode active-backup miimon 100 fail_over_mac 0"
+ check_all_mac_same
+ log_test "fail_over_mac 0" "all slaves have same mac"
+ do_active_backup_failover
+ check_all_mac_same
+ log_test "fail_over_mac 0" "failover: all slaves have same mac"
+
+ # fail_over_mac active
+ bond_reset "mode active-backup miimon 100 fail_over_mac 1"
+ check_bond_mac_same_with_active
+ log_test "fail_over_mac 1" "bond mac is same with active slave mac"
+ check_backup_slave_mac_not_change
+ log_test "fail_over_mac 1" "backup slave mac is not changed"
+ do_active_backup_failover
+ check_bond_mac_same_with_active
+ log_test "fail_over_mac 1" "failover: bond mac is same with active slave mac"
+ check_backup_slave_mac_not_change
+ log_test "fail_over_mac 1" "failover: backup slave mac is not changed"
+
+ # fail_over_mac follow
+ bond_reset "mode active-backup miimon 100 fail_over_mac 2"
+ check_bond_mac_same_with_first
+ log_test "fail_over_mac 2" "bond mac is same with first slave mac"
+ check_bond_mac_same_with_active
+ log_test "fail_over_mac 2" "bond mac is same with active slave mac"
+ check_backup_slave_mac_inherit
+ log_test "fail_over_mac 2" "backup slave mac inherit"
+ do_active_backup_failover
+ check_bond_mac_same_with_first
+ log_test "fail_over_mac 2" "failover: bond mac is same with first slave mac"
+ check_bond_mac_same_with_active
+ log_test "fail_over_mac 2" "failover: bond mac is same with active slave mac"
+ check_backup_slave_mac_inherit
+ log_test "fail_over_mac 2" "failover: backup slave mac inherit"
+ check_first_slave_random_mac
+ log_test "fail_over_mac 2" "first slave mac random"
+}
+
+vlan_over_bond_arp()
+{
+ local mode="$1"
+ RET=0
+
+ bond_reset "mode $mode arp_interval 100 arp_ip_target 192.0.3.10"
+ ip -n "${s_ns}" link add bond0.3 link bond0 type vlan id 3
+ ip -n "${s_ns}" link set bond0.3 up
+ ip -n "${s_ns}" addr add 192.0.3.1/24 dev bond0.3
+ ip -n "${s_ns}" addr add 2001:db8::3:1/64 dev bond0.3
+
+ slowwait_for_counter 5 5 tc_rule_handle_stats_get \
+ "dev eth0.3 ingress" 101 ".packets" "-n ${c_ns}" &> /dev/null || RET=1
+ log_test "vlan over bond arp" "$mode"
+}
+
+vlan_over_bond_ns()
+{
+ local mode="$1"
+ RET=0
+
+ if skip_ns; then
+ log_test_skip "vlan_over_bond ns" "$mode"
+ return 0
+ fi
+
+ bond_reset "mode $mode arp_interval 100 ns_ip6_target 2001:db8::3:10"
+ ip -n "${s_ns}" link add bond0.3 link bond0 type vlan id 3
+ ip -n "${s_ns}" link set bond0.3 up
+ ip -n "${s_ns}" addr add 192.0.3.1/24 dev bond0.3
+ ip -n "${s_ns}" addr add 2001:db8::3:1/64 dev bond0.3
+
+ slowwait_for_counter 5 5 tc_rule_handle_stats_get \
+ "dev eth0.3 ingress" 102 ".packets" "-n ${c_ns}" &> /dev/null || RET=1
+ log_test "vlan over bond ns" "$mode"
+}
+
+vlan_over_bond()
+{
+ # add vlan 3 for client
+ ip -n "${c_ns}" link add eth0.3 link eth0 type vlan id 3
+ ip -n "${c_ns}" link set eth0.3 up
+ ip -n "${c_ns}" addr add 192.0.3.10/24 dev eth0.3
+ ip -n "${c_ns}" addr add 2001:db8::3:10/64 dev eth0.3
+
+ # Add tc rule to check the vlan pkts
+ tc -n "${c_ns}" qdisc add dev eth0.3 clsact
+ tc -n "${c_ns}" filter add dev eth0.3 ingress protocol arp \
+ handle 101 flower skip_hw arp_op request \
+ arp_sip 192.0.3.1 arp_tip 192.0.3.10 action pass
+ tc -n "${c_ns}" filter add dev eth0.3 ingress protocol ipv6 \
+ handle 102 flower skip_hw ip_proto icmpv6 \
+ type 135 src_ip 2001:db8::3:1 action pass
+
+ vlan_over_bond_arp "active-backup"
+ vlan_over_bond_ns "active-backup"
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh
new file mode 100755
index 000000000000..9c3b089813df
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test if a bond interface works with lacp_active=off.
+
+# shellcheck disable=SC2034
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+# shellcheck disable=SC1091
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+# shellcheck disable=SC2317
+check_port_state()
+{
+ local netns=$1
+ local port=$2
+ local state=$3
+
+ ip -n "${netns}" -d -j link show "$port" | \
+ jq -e ".[].linkinfo.info_slave_data.ad_actor_oper_port_state_str | index(\"${state}\") != null" > /dev/null
+}
+
+check_pkt_count()
+{
+ RET=0
+ local ns="$1"
+ local iface="$2"
+
+ # wait 65s, one per 30s
+ slowwait_for_counter 65 2 tc_rule_handle_stats_get \
+ "dev ${iface} egress" 101 ".packets" "-n ${ns}" &> /dev/null
+}
+
+setup() {
+ setup_ns c_ns s_ns
+
+ # shellcheck disable=SC2154
+ ip -n "${c_ns}" link add eth0 type veth peer name eth0 netns "${s_ns}"
+ ip -n "${c_ns}" link add eth1 type veth peer name eth1 netns "${s_ns}"
+
+ # Add tc filter to count the pkts
+ tc -n "${c_ns}" qdisc add dev eth0 clsact
+ tc -n "${c_ns}" filter add dev eth0 egress handle 101 protocol 0x8809 matchall action pass
+ tc -n "${s_ns}" qdisc add dev eth1 clsact
+ tc -n "${s_ns}" filter add dev eth1 egress handle 101 protocol 0x8809 matchall action pass
+
+ ip -n "${s_ns}" link add bond0 type bond mode 802.3ad lacp_active on lacp_rate fast
+ ip -n "${s_ns}" link set eth0 master bond0
+ ip -n "${s_ns}" link set eth1 master bond0
+
+ ip -n "${c_ns}" link add bond0 type bond mode 802.3ad lacp_active off lacp_rate fast
+ ip -n "${c_ns}" link set eth0 master bond0
+ ip -n "${c_ns}" link set eth1 master bond0
+
+}
+
+trap cleanup_all_ns EXIT
+setup
+
+# The bond will send 2 lacpdu pkts during init time, let's wait at least 2s
+# after interface up
+ip -n "${c_ns}" link set bond0 up
+sleep 2
+
+# 1. The passive side shouldn't send LACPDU.
+check_pkt_count "${c_ns}" "eth0" && RET=1
+log_test "802.3ad lacp_active off" "init port"
+
+ip -n "${s_ns}" link set bond0 up
+# 2. The passive side should not have the 'active' flag.
+RET=0
+slowwait 2 check_port_state "${c_ns}" "eth0" "active" && RET=1
+log_test "802.3ad lacp_active off" "port state active"
+
+# 3. The active side should have the 'active' flag.
+RET=0
+slowwait 2 check_port_state "${s_ns}" "eth0" "active" || RET=1
+log_test "802.3ad lacp_active on" "port state active"
+
+# 4. Make sure the connection is not expired.
+RET=0
+slowwait 5 check_port_state "${s_ns}" "eth0" "distributing"
+slowwait 10 check_port_state "${s_ns}" "eth0" "expired" && RET=1
+log_test "bond 802.3ad lacp_active off" "port connection"
+
+# After testing, disconnect one port on each side to check the state.
+ip -n "${s_ns}" link set eth0 nomaster
+ip -n "${s_ns}" link set eth0 up
+ip -n "${c_ns}" link set eth1 nomaster
+ip -n "${c_ns}" link set eth1 up
+# Due to Periodic Machine and Rx Machine state change, the bond will still
+# send lacpdu pkts in a few seconds. sleep at lease 5s to make sure
+# negotiation finished
+sleep 5
+
+# 5. The active side should keep sending LACPDU.
+check_pkt_count "${s_ns}" "eth1" || RET=1
+log_test "bond 802.3ad lacp_active on" "port pkt after disconnect"
+
+# 6. The passive side shouldn't send LACPDU anymore.
+check_pkt_count "${c_ns}" "eth0" && RET=1
+log_test "bond 802.3ad lacp_active off" "port pkt after disconnect"
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
index 195ef83cfbf1..167aa4a4a12a 100644
--- a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
@@ -39,6 +39,8 @@ g_ip4="192.0.2.254"
s_ip6="2001:db8::1"
c_ip6="2001:db8::10"
g_ip6="2001:db8::254"
+mac[0]="00:0a:0b:0c:0d:01"
+mac[1]="00:0a:0b:0c:0d:02"
gateway_create()
{
@@ -62,6 +64,7 @@ server_create()
for i in $(seq 0 1); do
ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+ ip -n "${s_ns}" link set "eth${i}" addr "${mac[$i]}"
ip -n ${g_ns} link set s${i} up
ip -n ${g_ns} link set s${i} master br0
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
index 3a1333d9a85b..23a2932301cc 100644
--- a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
@@ -26,6 +26,7 @@
# +-------------------------------------+
source bond_topo_2d1c.sh
+mac[2]="00:0a:0b:0c:0d:03"
setup_prepare()
{
@@ -36,6 +37,7 @@ setup_prepare()
# Add the extra device as we use 3 down links for bond0
local i=2
ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+ ip -n "${s_ns}" link set "eth${i}" addr "${mac[$i]}"
ip -n ${g_ns} link set s${i} up
ip -n ${g_ns} link set s${i} master br0
ip -n ${s_ns} link set eth${i} master bond0
diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config
index dad4e5fda4db..832fa1caeb66 100644
--- a/tools/testing/selftests/drivers/net/bonding/config
+++ b/tools/testing/selftests/drivers/net/bonding/config
@@ -6,6 +6,8 @@ CONFIG_MACVLAN=y
CONFIG_IPVLAN=y
CONFIG_NET_ACT_GACT=y
CONFIG_NET_CLS_FLOWER=y
+CONFIG_NET_CLS_MATCHALL=m
CONFIG_NET_SCH_INGRESS=y
CONFIG_NLMON=y
CONFIG_VETH=y
+CONFIG_VLAN_8021Q=m
diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py
index cd23af875317..3e3a89a34afe 100755
--- a/tools/testing/selftests/drivers/net/hw/csum.py
+++ b/tools/testing/selftests/drivers/net/hw/csum.py
@@ -17,7 +17,7 @@ def test_receive(cfg, ipver="6", extra_args=None):
ip_args = f"-{ipver} -S {cfg.remote_addr_v[ipver]} -D {cfg.addr_v[ipver]}"
rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}"
- tx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -n 100 {ip_args} -r 1 -T {extra_args}"
+ tx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -n 100 {ip_args} -r 1 -T {extra_args}"
with bkg(rx_cmd, exit_wait=True):
wait_port_listen(34000, proto="udp")
@@ -37,7 +37,7 @@ def test_transmit(cfg, ipver="6", extra_args=None):
if extra_args != "-U -Z":
extra_args += " -r 1"
- rx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -L 1 -n 100 {ip_args} -R {extra_args}"
+ rx_cmd = f"{cfg.bin_remote} -i {cfg.remote_ifname} -L 1 -n 100 {ip_args} -R {extra_args}"
tx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -L 1 -n 100 {ip_args} -T {extra_args}"
with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py
index b2698db39817..9699a100a87d 100755
--- a/tools/testing/selftests/drivers/net/napi_threaded.py
+++ b/tools/testing/selftests/drivers/net/napi_threaded.py
@@ -35,6 +35,8 @@ def _setup_deferred_cleanup(cfg) -> None:
threaded = cmd(f"cat /sys/class/net/{cfg.ifname}/threaded").stdout
defer(_set_threaded_state, cfg, threaded)
+ return combined
+
def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None:
"""
@@ -49,7 +51,7 @@ def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None:
napi0_id = napis[0]['id']
napi1_id = napis[1]['id']
- _setup_deferred_cleanup(cfg)
+ qcnt = _setup_deferred_cleanup(cfg)
# set threaded
_set_threaded_state(cfg, 1)
@@ -62,7 +64,7 @@ def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None:
nl.napi_set({'id': napi1_id, 'threaded': 'disabled'})
cmd(f"ethtool -L {cfg.ifname} combined 1")
- cmd(f"ethtool -L {cfg.ifname} combined 2")
+ cmd(f"ethtool -L {cfg.ifname} combined {qcnt}")
_assert_napi_threaded_enabled(nl, napi0_id)
_assert_napi_threaded_disabled(nl, napi1_id)
@@ -80,7 +82,7 @@ def change_num_queues(cfg, nl) -> None:
napi0_id = napis[0]['id']
napi1_id = napis[1]['id']
- _setup_deferred_cleanup(cfg)
+ qcnt = _setup_deferred_cleanup(cfg)
# set threaded
_set_threaded_state(cfg, 1)
@@ -90,7 +92,7 @@ def change_num_queues(cfg, nl) -> None:
_assert_napi_threaded_enabled(nl, napi1_id)
cmd(f"ethtool -L {cfg.ifname} combined 1")
- cmd(f"ethtool -L {cfg.ifname} combined 2")
+ cmd(f"ethtool -L {cfg.ifname} combined {qcnt}")
# check napi threaded is set for both napis
_assert_napi_threaded_enabled(nl, napi0_id)
diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore
index fcbdb1297e24..64ac0dfa46b7 100644
--- a/tools/testing/selftests/filesystems/.gitignore
+++ b/tools/testing/selftests/filesystems/.gitignore
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
dnotify_test
devpts_pts
+fclog
file_stressor
anon_inode_test
kernfs_test
diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile
index 73d4650af1a5..85427d7f19b9 100644
--- a/tools/testing/selftests/filesystems/Makefile
+++ b/tools/testing/selftests/filesystems/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test kernfs_test
+TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test kernfs_test fclog
TEST_GEN_PROGS_EXTENDED := dnotify_test
include ../lib.mk
diff --git a/tools/testing/selftests/filesystems/fclog.c b/tools/testing/selftests/filesystems/fclog.c
new file mode 100644
index 000000000000..912a8b755c3b
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fclog.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Aleksa Sarai <cyphar@cyphar.com>
+ * Copyright (C) 2025 SUSE LLC.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mount.h>
+
+#include "../kselftest_harness.h"
+
+#define ASSERT_ERRNO(expected, _t, seen) \
+ __EXPECT(expected, #expected, \
+ ({__typeof__(seen) _tmp_seen = (seen); \
+ _tmp_seen >= 0 ? _tmp_seen : -errno; }), #seen, _t, 1)
+
+#define ASSERT_ERRNO_EQ(expected, seen) \
+ ASSERT_ERRNO(expected, ==, seen)
+
+#define ASSERT_SUCCESS(seen) \
+ ASSERT_ERRNO(0, <=, seen)
+
+FIXTURE(ns)
+{
+ int host_mntns;
+};
+
+FIXTURE_SETUP(ns)
+{
+ /* Stash the old mntns. */
+ self->host_mntns = open("/proc/self/ns/mnt", O_RDONLY|O_CLOEXEC);
+ ASSERT_SUCCESS(self->host_mntns);
+
+ /* Create a new mount namespace and make it private. */
+ ASSERT_SUCCESS(unshare(CLONE_NEWNS));
+ ASSERT_SUCCESS(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL));
+}
+
+FIXTURE_TEARDOWN(ns)
+{
+ ASSERT_SUCCESS(setns(self->host_mntns, CLONE_NEWNS));
+ ASSERT_SUCCESS(close(self->host_mntns));
+}
+
+TEST_F(ns, fscontext_log_enodata)
+{
+ int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ /* A brand new fscontext has no log entries. */
+ char buf[128] = {};
+ for (int i = 0; i < 16; i++)
+ ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf)));
+
+ ASSERT_SUCCESS(close(fsfd));
+}
+
+TEST_F(ns, fscontext_log_errorfc)
+{
+ int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0));
+
+ char buf[128] = {};
+ ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf)));
+ EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf);
+
+ /* The message has been consumed. */
+ ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf)));
+ ASSERT_SUCCESS(close(fsfd));
+}
+
+TEST_F(ns, fscontext_log_errorfc_after_fsmount)
+{
+ int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0));
+
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0));
+ int mfd = fsmount(fsfd, FSMOUNT_CLOEXEC, MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NOSUID);
+ ASSERT_SUCCESS(mfd);
+ ASSERT_SUCCESS(move_mount(mfd, "", AT_FDCWD, "/tmp", MOVE_MOUNT_F_EMPTY_PATH));
+
+ /*
+ * The fscontext log should still contain data even after
+ * FSCONFIG_CMD_CREATE and fsmount().
+ */
+ char buf[128] = {};
+ ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf)));
+ EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf);
+
+ /* The message has been consumed. */
+ ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf)));
+ ASSERT_SUCCESS(close(fsfd));
+}
+
+TEST_F(ns, fscontext_log_emsgsize)
+{
+ int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0));
+
+ char buf[128] = {};
+ /*
+ * Attempting to read a message with too small a buffer should not
+ * result in the message getting consumed.
+ */
+ ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 0));
+ ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 1));
+ for (int i = 0; i < 16; i++)
+ ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 16));
+
+ ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf)));
+ EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf);
+
+ /* The message has been consumed. */
+ ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf)));
+ ASSERT_SUCCESS(close(fsfd));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
index 63ce708d93ed..e4b7c2b457ee 100644
--- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
@@ -2,6 +2,13 @@
// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
#define _GNU_SOURCE
+
+// Needed for linux/fanotify.h
+typedef struct {
+ int val[2];
+} __kernel_fsid_t;
+#define __kernel_fsid_t __kernel_fsid_t
+
#include <fcntl.h>
#include <sched.h>
#include <stdio.h>
@@ -10,20 +17,12 @@
#include <sys/mount.h>
#include <unistd.h>
#include <sys/syscall.h>
+#include <sys/fanotify.h>
#include "../../kselftest_harness.h"
#include "../statmount/statmount.h"
#include "../utils.h"
-// Needed for linux/fanotify.h
-#ifndef __kernel_fsid_t
-typedef struct {
- int val[2];
-} __kernel_fsid_t;
-#endif
-
-#include <sys/fanotify.h>
-
static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
static const int mark_cmds[] = {
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c
index 090a5ca65004..9f57ca46e3af 100644
--- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c
@@ -2,6 +2,13 @@
// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
#define _GNU_SOURCE
+
+// Needed for linux/fanotify.h
+typedef struct {
+ int val[2];
+} __kernel_fsid_t;
+#define __kernel_fsid_t __kernel_fsid_t
+
#include <fcntl.h>
#include <sched.h>
#include <stdio.h>
@@ -10,21 +17,12 @@
#include <sys/mount.h>
#include <unistd.h>
#include <sys/syscall.h>
+#include <sys/fanotify.h>
#include "../../kselftest_harness.h"
-#include "../../pidfd/pidfd.h"
#include "../statmount/statmount.h"
#include "../utils.h"
-// Needed for linux/fanotify.h
-#ifndef __kernel_fsid_t
-typedef struct {
- int val[2];
-} __kernel_fsid_t;
-#endif
-
-#include <sys/fanotify.h>
-
static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
static const int mark_types[] = {
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index 8cfb87f7f7c5..490ace1f017e 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -1,12 +1,14 @@
# SPDX-License-Identifier: GPL-2.0
+PKG_CONFIG ?= pkg-config
+LIBNUMA_TEST = $(shell sh -c "$(PKG_CONFIG) numa --atleast-version 2.0.16 > /dev/null 2>&1 && echo SUFFICIENT || echo NO")
+
INCLUDES := -I../include -I../../ $(KHDR_INCLUDES)
-CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread $(INCLUDES) $(KHDR_INCLUDES)
+CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread -D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64 $(INCLUDES) $(KHDR_INCLUDES) -DLIBNUMA_VER_$(LIBNUMA_TEST)=1
LDLIBS := -lpthread -lrt -lnuma
LOCAL_HDRS := \
../include/futextest.h \
- ../include/atomic.h \
- ../include/logging.h
+ ../include/atomic.h
TEST_GEN_PROGS := \
futex_wait_timeout \
futex_wait_wouldblock \
diff --git a/tools/testing/selftests/futex/functional/futex_numa.c b/tools/testing/selftests/futex/functional/futex_numa.c
index f29e4d627e79..e0a33510ccb6 100644
--- a/tools/testing/selftests/futex/functional/futex_numa.c
+++ b/tools/testing/selftests/futex/functional/futex_numa.c
@@ -5,9 +5,10 @@
#include <sys/mman.h>
#include <fcntl.h>
#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
#include <time.h>
#include <assert.h>
-#include "logging.h"
#include "futextest.h"
#include "futex2test.h"
diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
index a9ecfb2d3932..d037a3f10ee8 100644
--- a/tools/testing/selftests/futex/functional/futex_numa_mpol.c
+++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c
@@ -16,9 +16,9 @@
#include <linux/futex.h>
#include <sys/mman.h>
-#include "logging.h"
#include "futextest.h"
#include "futex2test.h"
+#include "../../kselftest_harness.h"
#define MAX_THREADS 64
@@ -77,7 +77,7 @@ static void join_max_threads(void)
}
}
-static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flags)
+static void __test_futex(void *futex_ptr, int err_value, unsigned int futex_flags)
{
int to_wake, ret, i, need_exit = 0;
@@ -88,11 +88,17 @@ static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flag
do {
ret = futex2_wake(futex_ptr, to_wake, futex_flags);
- if (must_fail) {
- if (ret < 0)
- break;
- ksft_exit_fail_msg("futex2_wake(%d, 0x%x) should fail, but didn't\n",
- to_wake, futex_flags);
+
+ if (err_value) {
+ if (ret >= 0)
+ ksft_exit_fail_msg("futex2_wake(%d, 0x%x) should fail, but didn't\n",
+ to_wake, futex_flags);
+
+ if (errno != err_value)
+ ksft_exit_fail_msg("futex2_wake(%d, 0x%x) expected error was %d, but returned %d (%s)\n",
+ to_wake, futex_flags, err_value, errno, strerror(errno));
+
+ break;
}
if (ret < 0) {
ksft_exit_fail_msg("Failed futex2_wake(%d, 0x%x): %m\n",
@@ -106,12 +112,12 @@ static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flag
join_max_threads();
for (i = 0; i < MAX_THREADS; i++) {
- if (must_fail && thread_args[i].result != -1) {
+ if (err_value && thread_args[i].result != -1) {
ksft_print_msg("Thread %d should fail but succeeded (%d)\n",
i, thread_args[i].result);
need_exit = 1;
}
- if (!must_fail && thread_args[i].result != 0) {
+ if (!err_value && thread_args[i].result != 0) {
ksft_print_msg("Thread %d failed (%d)\n", i, thread_args[i].result);
need_exit = 1;
}
@@ -120,58 +126,30 @@ static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flag
ksft_exit_fail_msg("Aborting due to earlier errors.\n");
}
-static void test_futex(void *futex_ptr, int must_fail)
+static void test_futex(void *futex_ptr, int err_value)
{
- __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA);
+ __test_futex(futex_ptr, err_value, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA);
}
-static void test_futex_mpol(void *futex_ptr, int must_fail)
+static void test_futex_mpol(void *futex_ptr, int err_value)
{
- __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL);
+ __test_futex(futex_ptr, err_value, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL);
}
-static void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
-int main(int argc, char *argv[])
+TEST(futex_numa_mpol)
{
struct futex32_numa *futex_numa;
- int mem_size, i;
void *futex_ptr;
- int c;
-
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- break;
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(1);
+ int mem_size;
mem_size = sysconf(_SC_PAGE_SIZE);
- futex_ptr = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ futex_ptr = mmap(NULL, mem_size * 2, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
if (futex_ptr == MAP_FAILED)
ksft_exit_fail_msg("mmap() for %d bytes failed\n", mem_size);
+ /* Create an invalid memory region for the "Memory out of range" test */
+ mprotect(futex_ptr + mem_size, mem_size, PROT_NONE);
+
futex_numa = futex_ptr;
ksft_print_msg("Regular test\n");
@@ -182,27 +160,31 @@ int main(int argc, char *argv[])
if (futex_numa->numa == FUTEX_NO_NODE)
ksft_exit_fail_msg("NUMA node is left uninitialized\n");
- ksft_print_msg("Memory too small\n");
- test_futex(futex_ptr + mem_size - 4, 1);
+ /* FUTEX2_NUMA futex must be 8-byte aligned */
+ ksft_print_msg("Mis-aligned futex\n");
+ test_futex(futex_ptr + mem_size - 4, EINVAL);
ksft_print_msg("Memory out of range\n");
- test_futex(futex_ptr + mem_size, 1);
+ test_futex(futex_ptr + mem_size, EFAULT);
futex_numa->numa = FUTEX_NO_NODE;
mprotect(futex_ptr, mem_size, PROT_READ);
ksft_print_msg("Memory, RO\n");
- test_futex(futex_ptr, 1);
+ test_futex(futex_ptr, EFAULT);
mprotect(futex_ptr, mem_size, PROT_NONE);
ksft_print_msg("Memory, no access\n");
- test_futex(futex_ptr, 1);
+ test_futex(futex_ptr, EFAULT);
mprotect(futex_ptr, mem_size, PROT_READ | PROT_WRITE);
ksft_print_msg("Memory back to RW\n");
test_futex(futex_ptr, 0);
+ ksft_test_result_pass("futex2 memory boundary tests passed\n");
+
/* MPOL test. Does not work as expected */
- for (i = 0; i < 4; i++) {
+#ifdef LIBNUMA_VER_SUFFICIENT
+ for (int i = 0; i < 4; i++) {
unsigned long nodemask;
int ret;
@@ -221,15 +203,17 @@ int main(int argc, char *argv[])
ret = futex2_wake(futex_ptr, 0, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL);
if (ret < 0)
ksft_test_result_fail("Failed to wake 0 with MPOL: %m\n");
- if (0)
- test_futex_mpol(futex_numa, 0);
if (futex_numa->numa != i) {
ksft_exit_fail_msg("Returned NUMA node is %d expected %d\n",
futex_numa->numa, i);
}
}
}
- ksft_test_result_pass("NUMA MPOL tests passed\n");
- ksft_finished();
- return 0;
+ ksft_test_result_pass("futex2 MPOL hints test passed\n");
+#else
+ ksft_test_result_skip("futex2 MPOL hints test requires libnuma 2.0.16+\n");
+#endif
+ munmap(futex_ptr, mem_size * 2);
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c
index aea001ac4946..3b7b5851f290 100644
--- a/tools/testing/selftests/futex/functional/futex_priv_hash.c
+++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c
@@ -14,7 +14,7 @@
#include <linux/prctl.h>
#include <sys/prctl.h>
-#include "logging.h"
+#include "../../kselftest_harness.h"
#define MAX_THREADS 64
@@ -128,46 +128,14 @@ static void futex_dummy_op(void)
ksft_exit_fail_msg("pthread_mutex_timedlock() did not timeout: %d.\n", ret);
}
-static void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -g Test global hash instead intead local immutable \n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
static const char *test_msg_auto_create = "Automatic hash bucket init on thread creation.\n";
static const char *test_msg_auto_inc = "Automatic increase with more than 16 CPUs\n";
-int main(int argc, char *argv[])
+TEST(priv_hash)
{
int futex_slots1, futex_slotsn, online_cpus;
pthread_mutexattr_t mutex_attr_pi;
int ret, retry = 20;
- int c;
-
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- break;
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(21);
ret = pthread_mutexattr_init(&mutex_attr_pi);
ret |= pthread_mutexattr_setprotocol(&mutex_attr_pi, PTHREAD_PRIO_INHERIT);
@@ -189,14 +157,14 @@ int main(int argc, char *argv[])
if (ret != 0)
ksft_exit_fail_msg("pthread_join() failed: %d, %m\n", ret);
- /* First thread, has to initialiaze private hash */
+ /* First thread, has to initialize private hash */
futex_slots1 = futex_hash_slots_get();
if (futex_slots1 <= 0) {
ksft_print_msg("Current hash buckets: %d\n", futex_slots1);
- ksft_exit_fail_msg(test_msg_auto_create);
+ ksft_exit_fail_msg("%s", test_msg_auto_create);
}
- ksft_test_result_pass(test_msg_auto_create);
+ ksft_test_result_pass("%s", test_msg_auto_create);
online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1);
@@ -237,11 +205,11 @@ retry_getslots:
}
ksft_print_msg("Expected increase of hash buckets but got: %d -> %d\n",
futex_slots1, futex_slotsn);
- ksft_exit_fail_msg(test_msg_auto_inc);
+ ksft_exit_fail_msg("%s", test_msg_auto_inc);
}
- ksft_test_result_pass(test_msg_auto_inc);
+ ksft_test_result_pass("%s", test_msg_auto_inc);
} else {
- ksft_test_result_skip(test_msg_auto_inc);
+ ksft_test_result_skip("%s", test_msg_auto_inc);
}
ret = pthread_mutex_unlock(&global_lock);
@@ -257,17 +225,17 @@ retry_getslots:
futex_hash_slots_set_verify(2);
join_max_threads();
- ksft_test_result(counter == MAX_THREADS, "Created of waited for %d of %d threads\n",
+ ksft_test_result(counter == MAX_THREADS, "Created and waited for %d of %d threads\n",
counter, MAX_THREADS);
counter = 0;
- /* Once the user set something, auto reisze must be disabled */
+ /* Once the user set something, auto resize must be disabled */
ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS);
create_max_threads(thread_lock_fn);
join_max_threads();
ret = futex_hash_slots_get();
- ksft_test_result(ret == 2, "No more auto-resize after manaul setting, got %d\n",
+ ksft_test_result(ret == 2, "No more auto-resize after manual setting, got %d\n",
ret);
futex_hash_slots_set_must_fail(1 << 29);
@@ -280,7 +248,7 @@ retry_getslots:
ret = futex_hash_slots_set(0);
ksft_test_result(ret == 0, "Global hash request\n");
if (ret != 0)
- goto out;
+ return;
futex_hash_slots_set_must_fail(4);
futex_hash_slots_set_must_fail(8);
@@ -289,17 +257,14 @@ retry_getslots:
futex_hash_slots_set_must_fail(6);
ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS);
- if (ret != 0) {
+ if (ret != 0)
ksft_exit_fail_msg("pthread_barrier_init failed: %m\n");
- return 1;
- }
+
create_max_threads(thread_lock_fn);
join_max_threads();
ret = futex_hash_slots_get();
ksft_test_result(ret == 0, "Continue to use global hash\n");
-
-out:
- ksft_finished();
- return 0;
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_requeue.c b/tools/testing/selftests/futex/functional/futex_requeue.c
index 51485be6eb2f..69e2555b6039 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue.c
@@ -7,24 +7,15 @@
#include <pthread.h>
#include <limits.h>
-#include "logging.h"
+
#include "futextest.h"
+#include "../../kselftest_harness.h"
-#define TEST_NAME "futex-requeue"
#define timeout_ns 30000000
#define WAKE_WAIT_US 10000
volatile futex_t *f1;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
void *waiterfn(void *arg)
{
struct timespec to;
@@ -38,67 +29,49 @@ void *waiterfn(void *arg)
return NULL;
}
-int main(int argc, char *argv[])
+TEST(requeue_single)
{
- pthread_t waiter[10];
- int res, ret = RET_PASS;
- int c, i;
volatile futex_t _f1 = 0;
volatile futex_t f2 = 0;
+ pthread_t waiter[10];
+ int res;
f1 = &_f1;
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(2);
- ksft_print_msg("%s: Test futex_requeue\n",
- basename(argv[0]));
-
/*
* Requeue a waiter from f1 to f2, and wake f2.
*/
if (pthread_create(&waiter[0], NULL, waiterfn, NULL))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
usleep(WAKE_WAIT_US);
- info("Requeuing 1 futex from f1 to f2\n");
+ ksft_print_dbg_msg("Requeuing 1 futex from f1 to f2\n");
res = futex_cmp_requeue(f1, 0, &f2, 0, 1, 0);
- if (res != 1) {
+ if (res != 1)
ksft_test_result_fail("futex_requeue simple returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
- }
-
- info("Waking 1 futex at f2\n");
+ ksft_print_dbg_msg("Waking 1 futex at f2\n");
res = futex_wake(&f2, 1, 0);
if (res != 1) {
ksft_test_result_fail("futex_requeue simple returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_requeue simple succeeds\n");
}
+}
+
+TEST(requeue_multiple)
+{
+ volatile futex_t _f1 = 0;
+ volatile futex_t f2 = 0;
+ pthread_t waiter[10];
+ int res, i;
+ f1 = &_f1;
/*
* Create 10 waiters at f1. At futex_requeue, wake 3 and requeue 7.
@@ -106,31 +79,28 @@ int main(int argc, char *argv[])
*/
for (i = 0; i < 10; i++) {
if (pthread_create(&waiter[i], NULL, waiterfn, NULL))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
}
usleep(WAKE_WAIT_US);
- info("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n");
+ ksft_print_dbg_msg("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n");
res = futex_cmp_requeue(f1, 0, &f2, 3, 7, 0);
if (res != 10) {
ksft_test_result_fail("futex_requeue many returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
}
- info("Waking INT_MAX futexes at f2\n");
+ ksft_print_dbg_msg("Waking INT_MAX futexes at f2\n");
res = futex_wake(&f2, INT_MAX, 0);
if (res != 7) {
ksft_test_result_fail("futex_requeue many returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_requeue many succeeds\n");
}
-
- ksft_print_cnts();
- return ret;
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
index 215c6cb539b4..f299d75848cd 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
@@ -26,11 +26,11 @@
#include <stdlib.h>
#include <signal.h>
#include <string.h>
+
#include "atomic.h"
#include "futextest.h"
-#include "logging.h"
+#include "../../kselftest_harness.h"
-#define TEST_NAME "futex-requeue-pi"
#define MAX_WAKE_ITERS 1000
#define THREAD_MAX 10
#define SIGNAL_PERIOD_US 100
@@ -42,12 +42,6 @@ futex_t f1 = FUTEX_INITIALIZER;
futex_t f2 = FUTEX_INITIALIZER;
futex_t wake_complete = FUTEX_INITIALIZER;
-/* Test option defaults */
-static long timeout_ns;
-static int broadcast;
-static int owner;
-static int locked;
-
struct thread_arg {
long id;
struct timespec *timeout;
@@ -56,18 +50,73 @@ struct thread_arg {
};
#define THREAD_ARG_INITIALIZER { 0, NULL, 0, 0 }
-void usage(char *prog)
+FIXTURE(args)
{
- printf("Usage: %s\n", prog);
- printf(" -b Broadcast wakeup (all waiters)\n");
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -l Lock the pi futex across requeue\n");
- printf(" -o Use a third party pi futex owner during requeue (cancels -l)\n");
- printf(" -t N Timeout in nanoseconds (default: 0)\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
+};
+
+FIXTURE_SETUP(args)
+{
+};
+
+FIXTURE_TEARDOWN(args)
+{
+};
+
+FIXTURE_VARIANT(args)
+{
+ long timeout_ns;
+ bool broadcast;
+ bool owner;
+ bool locked;
+};
+
+/*
+ * For a given timeout value, this macro creates a test input with all the
+ * possible combinations of valid arguments
+ */
+#define FIXTURE_VARIANT_ADD_TIMEOUT(timeout) \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout) \
+{ \
+ .timeout_ns = timeout, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast) \
+{ \
+ .timeout_ns = timeout, \
+ .broadcast = true, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast_locked) \
+{ \
+ .timeout_ns = timeout, \
+ .broadcast = true, \
+ .locked = true, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_broadcast_owner) \
+{ \
+ .timeout_ns = timeout, \
+ .broadcast = true, \
+ .owner = true, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_locked) \
+{ \
+ .timeout_ns = timeout, \
+ .locked = true, \
+}; \
+ \
+FIXTURE_VARIANT_ADD(args, t_##timeout##_owner) \
+{ \
+ .timeout_ns = timeout, \
+ .owner = true, \
+}; \
+
+FIXTURE_VARIANT_ADD_TIMEOUT(0);
+FIXTURE_VARIANT_ADD_TIMEOUT(5000);
+FIXTURE_VARIANT_ADD_TIMEOUT(500000);
+FIXTURE_VARIANT_ADD_TIMEOUT(2000000000);
int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
int policy, int prio)
@@ -81,26 +130,26 @@ int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
if (ret) {
- error("pthread_attr_setinheritsched\n", ret);
+ ksft_exit_fail_msg("pthread_attr_setinheritsched\n");
return -1;
}
ret = pthread_attr_setschedpolicy(&attr, policy);
if (ret) {
- error("pthread_attr_setschedpolicy\n", ret);
+ ksft_exit_fail_msg("pthread_attr_setschedpolicy\n");
return -1;
}
schedp.sched_priority = prio;
ret = pthread_attr_setschedparam(&attr, &schedp);
if (ret) {
- error("pthread_attr_setschedparam\n", ret);
+ ksft_exit_fail_msg("pthread_attr_setschedparam\n");
return -1;
}
ret = pthread_create(pth, &attr, func, arg);
if (ret) {
- error("pthread_create\n", ret);
+ ksft_exit_fail_msg("pthread_create\n");
return -1;
}
return 0;
@@ -112,7 +161,7 @@ void *waiterfn(void *arg)
struct thread_arg *args = (struct thread_arg *)arg;
futex_t old_val;
- info("Waiter %ld: running\n", args->id);
+ ksft_print_dbg_msg("Waiter %ld: running\n", args->id);
/* Each thread sleeps for a different amount of time
* This is to avoid races, because we don't lock the
* external mutex here */
@@ -120,26 +169,25 @@ void *waiterfn(void *arg)
old_val = f1;
atomic_inc(&waiters_blocked);
- info("Calling futex_wait_requeue_pi: %p (%u) -> %p\n",
+ ksft_print_dbg_msg("Calling futex_wait_requeue_pi: %p (%u) -> %p\n",
&f1, f1, &f2);
args->ret = futex_wait_requeue_pi(&f1, old_val, &f2, args->timeout,
FUTEX_PRIVATE_FLAG);
- info("waiter %ld woke with %d %s\n", args->id, args->ret,
+ ksft_print_dbg_msg("waiter %ld woke with %d %s\n", args->id, args->ret,
args->ret < 0 ? strerror(errno) : "");
atomic_inc(&waiters_woken);
if (args->ret < 0) {
if (args->timeout && errno == ETIMEDOUT)
args->ret = 0;
else {
- args->ret = RET_ERROR;
- error("futex_wait_requeue_pi\n", errno);
+ ksft_exit_fail_msg("futex_wait_requeue_pi\n");
}
futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
}
futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
- info("Waiter %ld: exiting with %d\n", args->id, args->ret);
+ ksft_print_dbg_msg("Waiter %ld: exiting with %d\n", args->id, args->ret);
pthread_exit((void *)&args->ret);
}
@@ -152,14 +200,14 @@ void *broadcast_wakerfn(void *arg)
int nr_wake = 1;
int i = 0;
- info("Waker: waiting for waiters to block\n");
+ ksft_print_dbg_msg("Waker: waiting for waiters to block\n");
while (waiters_blocked.val < THREAD_MAX)
usleep(1000);
usleep(1000);
- info("Waker: Calling broadcast\n");
+ ksft_print_dbg_msg("Waker: Calling broadcast\n");
if (args->lock) {
- info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2);
+ ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2);
futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
}
continue_requeue:
@@ -167,16 +215,14 @@ void *broadcast_wakerfn(void *arg)
args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, nr_wake, nr_requeue,
FUTEX_PRIVATE_FLAG);
if (args->ret < 0) {
- args->ret = RET_ERROR;
- error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+ ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n");
} else if (++i < MAX_WAKE_ITERS) {
task_count += args->ret;
if (task_count < THREAD_MAX - waiters_woken.val)
goto continue_requeue;
} else {
- error("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n",
- 0, MAX_WAKE_ITERS, task_count, THREAD_MAX);
- args->ret = RET_ERROR;
+ ksft_exit_fail_msg("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n",
+ MAX_WAKE_ITERS, task_count, THREAD_MAX);
}
futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG);
@@ -187,7 +233,7 @@ void *broadcast_wakerfn(void *arg)
if (args->ret > 0)
args->ret = task_count;
- info("Waker: exiting with %d\n", args->ret);
+ ksft_print_dbg_msg("Waker: exiting with %d\n", args->ret);
pthread_exit((void *)&args->ret);
}
@@ -200,20 +246,20 @@ void *signal_wakerfn(void *arg)
int nr_wake = 1;
int i = 0;
- info("Waker: waiting for waiters to block\n");
+ ksft_print_dbg_msg("Waker: waiting for waiters to block\n");
while (waiters_blocked.val < THREAD_MAX)
usleep(1000);
usleep(1000);
while (task_count < THREAD_MAX && waiters_woken.val < THREAD_MAX) {
- info("task_count: %d, waiters_woken: %d\n",
+ ksft_print_dbg_msg("task_count: %d, waiters_woken: %d\n",
task_count, waiters_woken.val);
if (args->lock) {
- info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n",
- f2, &f2);
+ ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n",
+ f2, &f2);
futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
}
- info("Waker: Calling signal\n");
+ ksft_print_dbg_msg("Waker: Calling signal\n");
/* cond_signal */
old_val = f1;
args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2,
@@ -221,28 +267,23 @@ void *signal_wakerfn(void *arg)
FUTEX_PRIVATE_FLAG);
if (args->ret < 0)
args->ret = -errno;
- info("futex: %x\n", f2);
+ ksft_print_dbg_msg("futex: %x\n", f2);
if (args->lock) {
- info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n",
- f2, &f2);
+ ksft_print_dbg_msg("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n",
+ f2, &f2);
futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
}
- info("futex: %x\n", f2);
- if (args->ret < 0) {
- error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
- args->ret = RET_ERROR;
- break;
- }
+ ksft_print_dbg_msg("futex: %x\n", f2);
+ if (args->ret < 0)
+ ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n");
task_count += args->ret;
usleep(SIGNAL_PERIOD_US);
i++;
/* we have to loop at least THREAD_MAX times */
if (i > MAX_WAKE_ITERS + THREAD_MAX) {
- error("max signaling iterations (%d) reached, giving up on pending waiters.\n",
- 0, MAX_WAKE_ITERS + THREAD_MAX);
- args->ret = RET_ERROR;
- break;
+ ksft_exit_fail_msg("max signaling iterations (%d) reached, giving up on pending waiters.\n",
+ MAX_WAKE_ITERS + THREAD_MAX);
}
}
@@ -251,8 +292,8 @@ void *signal_wakerfn(void *arg)
if (args->ret >= 0)
args->ret = task_count;
- info("Waker: exiting with %d\n", args->ret);
- info("Waker: waiters_woken: %d\n", waiters_woken.val);
+ ksft_print_dbg_msg("Waker: exiting with %d\n", args->ret);
+ ksft_print_dbg_msg("Waker: waiters_woken: %d\n", waiters_woken.val);
pthread_exit((void *)&args->ret);
}
@@ -269,35 +310,40 @@ void *third_party_blocker(void *arg)
ret2 = futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
out:
- if (args->ret || ret2) {
- error("third_party_blocker() futex error", 0);
- args->ret = RET_ERROR;
- }
+ if (args->ret || ret2)
+ ksft_exit_fail_msg("third_party_blocker() futex error");
pthread_exit((void *)&args->ret);
}
-int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
+TEST_F(args, futex_requeue_pi)
{
- void *(*wakerfn)(void *) = signal_wakerfn;
struct thread_arg blocker_arg = THREAD_ARG_INITIALIZER;
struct thread_arg waker_arg = THREAD_ARG_INITIALIZER;
pthread_t waiter[THREAD_MAX], waker, blocker;
- struct timespec ts, *tsp = NULL;
+ void *(*wakerfn)(void *) = signal_wakerfn;
+ bool third_party_owner = variant->owner;
+ long timeout_ns = variant->timeout_ns;
+ bool broadcast = variant->broadcast;
struct thread_arg args[THREAD_MAX];
- int *waiter_ret;
- int i, ret = RET_PASS;
+ struct timespec ts, *tsp = NULL;
+ bool lock = variant->locked;
+ int *waiter_ret, i, ret = 0;
+
+ ksft_print_msg(
+ "\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
+ broadcast, lock, third_party_owner, timeout_ns);
if (timeout_ns) {
time_t secs;
- info("timeout_ns = %ld\n", timeout_ns);
+ ksft_print_dbg_msg("timeout_ns = %ld\n", timeout_ns);
ret = clock_gettime(CLOCK_MONOTONIC, &ts);
secs = (ts.tv_nsec + timeout_ns) / 1000000000;
ts.tv_nsec = ((int64_t)ts.tv_nsec + timeout_ns) % 1000000000;
ts.tv_sec += secs;
- info("ts.tv_sec = %ld\n", ts.tv_sec);
- info("ts.tv_nsec = %ld\n", ts.tv_nsec);
+ ksft_print_dbg_msg("ts.tv_sec = %ld\n", ts.tv_sec);
+ ksft_print_dbg_msg("ts.tv_nsec = %ld\n", ts.tv_nsec);
tsp = &ts;
}
@@ -307,10 +353,7 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
if (third_party_owner) {
if (create_rt_thread(&blocker, third_party_blocker,
(void *)&blocker_arg, SCHED_FIFO, 1)) {
- error("Creating third party blocker thread failed\n",
- errno);
- ret = RET_ERROR;
- goto out;
+ ksft_exit_fail_msg("Creating third party blocker thread failed\n");
}
}
@@ -318,20 +361,16 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
for (i = 0; i < THREAD_MAX; i++) {
args[i].id = i;
args[i].timeout = tsp;
- info("Starting thread %d\n", i);
+ ksft_print_dbg_msg("Starting thread %d\n", i);
if (create_rt_thread(&waiter[i], waiterfn, (void *)&args[i],
SCHED_FIFO, 1)) {
- error("Creating waiting thread failed\n", errno);
- ret = RET_ERROR;
- goto out;
+ ksft_exit_fail_msg("Creating waiting thread failed\n");
}
}
waker_arg.lock = lock;
if (create_rt_thread(&waker, wakerfn, (void *)&waker_arg,
SCHED_FIFO, 1)) {
- error("Creating waker thread failed\n", errno);
- ret = RET_ERROR;
- goto out;
+ ksft_exit_fail_msg("Creating waker thread failed\n");
}
/* Wait for threads to finish */
@@ -345,7 +384,6 @@ int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
pthread_join(blocker, NULL);
pthread_join(waker, NULL);
-out:
if (!ret) {
if (*waiter_ret)
ret = *waiter_ret;
@@ -355,66 +393,8 @@ out:
ret = blocker_arg.ret;
}
- return ret;
+ if (ret)
+ ksft_test_result_fail("fail");
}
-int main(int argc, char *argv[])
-{
- char *test_name;
- int c, ret;
-
- while ((c = getopt(argc, argv, "bchlot:v:")) != -1) {
- switch (c) {
- case 'b':
- broadcast = 1;
- break;
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'l':
- locked = 1;
- break;
- case 'o':
- owner = 1;
- locked = 0;
- break;
- case 't':
- timeout_ns = atoi(optarg);
- break;
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg("%s: Test requeue functionality\n", basename(argv[0]));
- ksft_print_msg(
- "\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
- broadcast, locked, owner, timeout_ns);
-
- ret = asprintf(&test_name,
- "%s broadcast=%d locked=%d owner=%d timeout=%ldns",
- TEST_NAME, broadcast, locked, owner, timeout_ns);
- if (ret < 0) {
- ksft_print_msg("Failed to generate test name\n");
- test_name = TEST_NAME;
- }
-
- /*
- * FIXME: unit_test is obsolete now that we parse options and the
- * various style of runs are done by run.sh - simplify the code and move
- * unit_test into main()
- */
- ret = unit_test(broadcast, locked, owner, timeout_ns);
-
- print_result(test_name, ret);
- return ret;
-}
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
index d0a4d332ea44..77135a22a583 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
@@ -23,67 +23,32 @@
#include <stdlib.h>
#include <string.h>
#include <time.h>
-#include "futextest.h"
-#include "logging.h"
-#define TEST_NAME "futex-requeue-pi-mismatched-ops"
+#include "futextest.h"
+#include "../../kselftest_harness.h"
futex_t f1 = FUTEX_INITIALIZER;
futex_t f2 = FUTEX_INITIALIZER;
int child_ret = 0;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
void *blocking_child(void *arg)
{
child_ret = futex_wait(&f1, f1, NULL, FUTEX_PRIVATE_FLAG);
if (child_ret < 0) {
child_ret = -errno;
- error("futex_wait\n", errno);
+ ksft_exit_fail_msg("futex_wait\n");
}
return (void *)&child_ret;
}
-int main(int argc, char *argv[])
+TEST(requeue_pi_mismatched_ops)
{
- int ret = RET_PASS;
pthread_t child;
- int c;
+ int ret;
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg("%s: Detect mismatched requeue_pi operations\n",
- basename(argv[0]));
+ if (pthread_create(&child, NULL, blocking_child, NULL))
+ ksft_exit_fail_msg("pthread_create\n");
- if (pthread_create(&child, NULL, blocking_child, NULL)) {
- error("pthread_create\n", errno);
- ret = RET_ERROR;
- goto out;
- }
/* Allow the child to block in the kernel. */
sleep(1);
@@ -102,34 +67,27 @@ int main(int argc, char *argv[])
* FUTEX_WAKE.
*/
ret = futex_wake(&f1, 1, FUTEX_PRIVATE_FLAG);
- if (ret == 1) {
- ret = RET_PASS;
- } else if (ret < 0) {
- error("futex_wake\n", errno);
- ret = RET_ERROR;
- } else {
- error("futex_wake did not wake the child\n", 0);
- ret = RET_ERROR;
- }
+ if (ret == 1)
+ ret = 0;
+ else if (ret < 0)
+ ksft_exit_fail_msg("futex_wake\n");
+ else
+ ksft_exit_fail_msg("futex_wake did not wake the child\n");
} else {
- error("futex_cmp_requeue_pi\n", errno);
- ret = RET_ERROR;
+ ksft_exit_fail_msg("futex_cmp_requeue_pi\n");
}
} else if (ret > 0) {
- fail("futex_cmp_requeue_pi failed to detect the mismatch\n");
- ret = RET_FAIL;
+ ksft_test_result_fail("futex_cmp_requeue_pi failed to detect the mismatch\n");
} else {
- error("futex_cmp_requeue_pi found no waiters\n", 0);
- ret = RET_ERROR;
+ ksft_exit_fail_msg("futex_cmp_requeue_pi found no waiters\n");
}
pthread_join(child, NULL);
- if (!ret)
- ret = child_ret;
-
- out:
- /* If the kernel crashes, we shouldn't return at all. */
- print_result(TEST_NAME, ret);
- return ret;
+ if (!ret && !child_ret)
+ ksft_test_result_pass("futex_requeue_pi_mismatched_ops passed\n");
+ else
+ ksft_test_result_pass("futex_requeue_pi_mismatched_ops failed\n");
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
index c6b8f32990c8..e34ee0f9ebcc 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
@@ -24,11 +24,11 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+
#include "atomic.h"
#include "futextest.h"
-#include "logging.h"
+#include "../../kselftest_harness.h"
-#define TEST_NAME "futex-requeue-pi-signal-restart"
#define DELAY_US 100
futex_t f1 = FUTEX_INITIALIZER;
@@ -37,15 +37,6 @@ atomic_t requeued = ATOMIC_INITIALIZER;
int waiter_ret = 0;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
int policy, int prio)
{
@@ -57,35 +48,28 @@ int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
memset(&schedp, 0, sizeof(schedp));
ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
- if (ret) {
- error("pthread_attr_setinheritsched\n", ret);
- return -1;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_attr_setinheritsched\n");
ret = pthread_attr_setschedpolicy(&attr, policy);
- if (ret) {
- error("pthread_attr_setschedpolicy\n", ret);
- return -1;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_attr_setschedpolicy\n");
schedp.sched_priority = prio;
ret = pthread_attr_setschedparam(&attr, &schedp);
- if (ret) {
- error("pthread_attr_setschedparam\n", ret);
- return -1;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_attr_setschedparam\n");
ret = pthread_create(pth, &attr, func, arg);
- if (ret) {
- error("pthread_create\n", ret);
- return -1;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_create\n");
+
return 0;
}
void handle_signal(int signo)
{
- info("signal received %s requeue\n",
+ ksft_print_dbg_msg("signal received %s requeue\n",
requeued.val ? "after" : "prior to");
}
@@ -94,78 +78,46 @@ void *waiterfn(void *arg)
unsigned int old_val;
int res;
- waiter_ret = RET_PASS;
-
- info("Waiter running\n");
- info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+ ksft_print_dbg_msg("Waiter running\n");
+ ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
old_val = f1;
res = futex_wait_requeue_pi(&f1, old_val, &(f2), NULL,
FUTEX_PRIVATE_FLAG);
if (!requeued.val || errno != EWOULDBLOCK) {
- fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n",
+ ksft_test_result_fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n",
res, strerror(errno));
- info("w2:futex: %x\n", f2);
+ ksft_print_dbg_msg("w2:futex: %x\n", f2);
if (!res)
futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
- waiter_ret = RET_FAIL;
}
- info("Waiter exiting with %d\n", waiter_ret);
pthread_exit(NULL);
}
-int main(int argc, char *argv[])
+TEST(futex_requeue_pi_signal_restart)
{
unsigned int old_val;
struct sigaction sa;
pthread_t waiter;
- int c, res, ret = RET_PASS;
-
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg("%s: Test signal handling during requeue_pi\n",
- basename(argv[0]));
- ksft_print_msg("\tArguments: <none>\n");
+ int res;
sa.sa_handler = handle_signal;
sigemptyset(&sa.sa_mask);
sa.sa_flags = 0;
- if (sigaction(SIGUSR1, &sa, NULL)) {
- error("sigaction\n", errno);
- exit(1);
- }
+ if (sigaction(SIGUSR1, &sa, NULL))
+ ksft_exit_fail_msg("sigaction\n");
- info("m1:f2: %x\n", f2);
- info("Creating waiter\n");
+ ksft_print_dbg_msg("m1:f2: %x\n", f2);
+ ksft_print_dbg_msg("Creating waiter\n");
res = create_rt_thread(&waiter, waiterfn, NULL, SCHED_FIFO, 1);
- if (res) {
- error("Creating waiting thread failed", res);
- ret = RET_ERROR;
- goto out;
- }
+ if (res)
+ ksft_exit_fail_msg("Creating waiting thread failed");
- info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
- info("m2:f2: %x\n", f2);
+ ksft_print_dbg_msg("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+ ksft_print_dbg_msg("m2:f2: %x\n", f2);
futex_lock_pi(&f2, 0, 0, FUTEX_PRIVATE_FLAG);
- info("m3:f2: %x\n", f2);
+ ksft_print_dbg_msg("m3:f2: %x\n", f2);
while (1) {
/*
@@ -173,11 +125,11 @@ int main(int argc, char *argv[])
* restart futex_wait_requeue_pi() in the kernel. Wait for the
* waiter to block on f1 again.
*/
- info("Issuing SIGUSR1 to waiter\n");
+ ksft_print_dbg_msg("Issuing SIGUSR1 to waiter\n");
pthread_kill(waiter, SIGUSR1);
usleep(DELAY_US);
- info("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n");
+ ksft_print_dbg_msg("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n");
old_val = f1;
res = futex_cmp_requeue_pi(&f1, old_val, &(f2), 1, 0,
FUTEX_PRIVATE_FLAG);
@@ -191,12 +143,10 @@ int main(int argc, char *argv[])
atomic_set(&requeued, 1);
break;
} else if (res < 0) {
- error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
- ret = RET_ERROR;
- break;
+ ksft_exit_fail_msg("FUTEX_CMP_REQUEUE_PI failed\n");
}
}
- info("m4:f2: %x\n", f2);
+ ksft_print_dbg_msg("m4:f2: %x\n", f2);
/*
* Signal the waiter after requeue, waiter should return from
@@ -204,19 +154,14 @@ int main(int argc, char *argv[])
* futex_unlock_pi() can't happen before the signal wakeup is detected
* in the kernel.
*/
- info("Issuing SIGUSR1 to waiter\n");
+ ksft_print_dbg_msg("Issuing SIGUSR1 to waiter\n");
pthread_kill(waiter, SIGUSR1);
- info("Waiting for waiter to return\n");
+ ksft_print_dbg_msg("Waiting for waiter to return\n");
pthread_join(waiter, NULL);
- info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2);
+ ksft_print_dbg_msg("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2);
futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
- info("m5:f2: %x\n", f2);
-
- out:
- if (ret == RET_PASS && waiter_ret)
- ret = waiter_ret;
-
- print_result(TEST_NAME, ret);
- return ret;
+ ksft_print_dbg_msg("m5:f2: %x\n", f2);
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait.c b/tools/testing/selftests/futex/functional/futex_wait.c
index 685140d9b93d..152ca4612886 100644
--- a/tools/testing/selftests/futex/functional/futex_wait.c
+++ b/tools/testing/selftests/futex/functional/futex_wait.c
@@ -9,25 +9,16 @@
#include <sys/shm.h>
#include <sys/mman.h>
#include <fcntl.h>
-#include "logging.h"
+
#include "futextest.h"
+#include "../../kselftest_harness.h"
-#define TEST_NAME "futex-wait"
#define timeout_ns 30000000
#define WAKE_WAIT_US 10000
#define SHM_PATH "futex_shm_file"
void *futex;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
static void *waiterfn(void *arg)
{
struct timespec to;
@@ -45,53 +36,37 @@ static void *waiterfn(void *arg)
return NULL;
}
-int main(int argc, char *argv[])
+TEST(private_futex)
{
- int res, ret = RET_PASS, fd, c, shm_id;
- u_int32_t f_private = 0, *shared_data;
unsigned int flags = FUTEX_PRIVATE_FLAG;
+ u_int32_t f_private = 0;
pthread_t waiter;
- void *shm;
+ int res;
futex = &f_private;
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(3);
- ksft_print_msg("%s: Test futex_wait\n", basename(argv[0]));
-
/* Testing a private futex */
- info("Calling private futex_wait on futex: %p\n", futex);
+ ksft_print_dbg_msg("Calling private futex_wait on futex: %p\n", futex);
if (pthread_create(&waiter, NULL, waiterfn, (void *) &flags))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
usleep(WAKE_WAIT_US);
- info("Calling private futex_wake on futex: %p\n", futex);
+ ksft_print_dbg_msg("Calling private futex_wake on futex: %p\n", futex);
res = futex_wake(futex, 1, FUTEX_PRIVATE_FLAG);
if (res != 1) {
ksft_test_result_fail("futex_wake private returned: %d %s\n",
errno, strerror(errno));
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_wake private succeeds\n");
}
+}
+
+TEST(anon_page)
+{
+ u_int32_t *shared_data;
+ pthread_t waiter;
+ int res, shm_id;
/* Testing an anon page shared memory */
shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
@@ -105,67 +80,65 @@ int main(int argc, char *argv[])
*shared_data = 0;
futex = shared_data;
- info("Calling shared (page anon) futex_wait on futex: %p\n", futex);
+ ksft_print_dbg_msg("Calling shared (page anon) futex_wait on futex: %p\n", futex);
if (pthread_create(&waiter, NULL, waiterfn, NULL))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
usleep(WAKE_WAIT_US);
- info("Calling shared (page anon) futex_wake on futex: %p\n", futex);
+ ksft_print_dbg_msg("Calling shared (page anon) futex_wake on futex: %p\n", futex);
res = futex_wake(futex, 1, 0);
if (res != 1) {
ksft_test_result_fail("futex_wake shared (page anon) returned: %d %s\n",
errno, strerror(errno));
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_wake shared (page anon) succeeds\n");
}
+ shmdt(shared_data);
+}
+
+TEST(file_backed)
+{
+ u_int32_t f_private = 0;
+ pthread_t waiter;
+ int res, fd;
+ void *shm;
/* Testing a file backed shared memory */
fd = open(SHM_PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
- if (fd < 0) {
- perror("open");
- exit(1);
- }
+ if (fd < 0)
+ ksft_exit_fail_msg("open");
- if (ftruncate(fd, sizeof(f_private))) {
- perror("ftruncate");
- exit(1);
- }
+ if (ftruncate(fd, sizeof(f_private)))
+ ksft_exit_fail_msg("ftruncate");
shm = mmap(NULL, sizeof(f_private), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- if (shm == MAP_FAILED) {
- perror("mmap");
- exit(1);
- }
+ if (shm == MAP_FAILED)
+ ksft_exit_fail_msg("mmap");
memcpy(shm, &f_private, sizeof(f_private));
futex = shm;
- info("Calling shared (file backed) futex_wait on futex: %p\n", futex);
+ ksft_print_dbg_msg("Calling shared (file backed) futex_wait on futex: %p\n", futex);
if (pthread_create(&waiter, NULL, waiterfn, NULL))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
usleep(WAKE_WAIT_US);
- info("Calling shared (file backed) futex_wake on futex: %p\n", futex);
+ ksft_print_dbg_msg("Calling shared (file backed) futex_wake on futex: %p\n", futex);
res = futex_wake(shm, 1, 0);
if (res != 1) {
ksft_test_result_fail("futex_wake shared (file backed) returned: %d %s\n",
errno, strerror(errno));
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_wake shared (file backed) succeeds\n");
}
- /* Freeing resources */
- shmdt(shared_data);
munmap(shm, sizeof(f_private));
remove(SHM_PATH);
close(fd);
-
- ksft_print_cnts();
- return ret;
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
index fb4148f23fa3..8952ebda14ab 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
@@ -27,10 +27,9 @@
#include <libgen.h>
#include <signal.h>
-#include "logging.h"
#include "futextest.h"
+#include "../../kselftest_harness.h"
-#define TEST_NAME "futex-wait-private-mapped-file"
#define PAGE_SZ 4096
char pad[PAGE_SZ] = {1};
@@ -40,86 +39,44 @@ char pad2[PAGE_SZ] = {1};
#define WAKE_WAIT_US 3000000
struct timespec wait_timeout = { .tv_sec = 5, .tv_nsec = 0};
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
void *thr_futex_wait(void *arg)
{
int ret;
- info("futex wait\n");
+ ksft_print_dbg_msg("futex wait\n");
ret = futex_wait(&val, 1, &wait_timeout, 0);
- if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) {
- error("futex error.\n", errno);
- print_result(TEST_NAME, RET_ERROR);
- exit(RET_ERROR);
- }
+ if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT)
+ ksft_exit_fail_msg("futex error.\n");
if (ret && errno == ETIMEDOUT)
- fail("waiter timedout\n");
+ ksft_exit_fail_msg("waiter timedout\n");
- info("futex_wait: ret = %d, errno = %d\n", ret, errno);
+ ksft_print_dbg_msg("futex_wait: ret = %d, errno = %d\n", ret, errno);
return NULL;
}
-int main(int argc, char **argv)
+TEST(wait_private_mapped_file)
{
pthread_t thr;
- int ret = RET_PASS;
int res;
- int c;
-
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg(
- "%s: Test the futex value of private file mappings in FUTEX_WAIT\n",
- basename(argv[0]));
-
- ret = pthread_create(&thr, NULL, thr_futex_wait, NULL);
- if (ret < 0) {
- fprintf(stderr, "pthread_create error\n");
- ret = RET_ERROR;
- goto out;
- }
-
- info("wait a while\n");
+
+ res = pthread_create(&thr, NULL, thr_futex_wait, NULL);
+ if (res < 0)
+ ksft_exit_fail_msg("pthread_create error\n");
+
+ ksft_print_dbg_msg("wait a while\n");
usleep(WAKE_WAIT_US);
val = 2;
res = futex_wake(&val, 1, 0);
- info("futex_wake %d\n", res);
- if (res != 1) {
- fail("FUTEX_WAKE didn't find the waiting thread.\n");
- ret = RET_FAIL;
- }
+ ksft_print_dbg_msg("futex_wake %d\n", res);
+ if (res != 1)
+ ksft_exit_fail_msg("FUTEX_WAKE didn't find the waiting thread.\n");
- info("join\n");
+ ksft_print_dbg_msg("join\n");
pthread_join(thr, NULL);
- out:
- print_result(TEST_NAME, ret);
- return ret;
+ ksft_test_result_pass("wait_private_mapped_file");
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
index d183f878360b..0c8766aced2e 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -16,26 +16,15 @@
*****************************************************************************/
#include <pthread.h>
+
#include "futextest.h"
#include "futex2test.h"
-#include "logging.h"
-
-#define TEST_NAME "futex-wait-timeout"
+#include "../../kselftest_harness.h"
static long timeout_ns = 100000; /* 100us default timeout */
static futex_t futex_pi;
static pthread_barrier_t barrier;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -t N Timeout in nanoseconds (default: 100,000)\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
/*
* Get a PI lock and hold it forever, so the main thread lock_pi will block
* and we can test the timeout
@@ -47,13 +36,13 @@ void *get_pi_lock(void *arg)
ret = futex_lock_pi(&futex_pi, NULL, 0, 0);
if (ret != 0)
- error("futex_lock_pi failed\n", ret);
+ ksft_exit_fail_msg("futex_lock_pi failed\n");
pthread_barrier_wait(&barrier);
/* Blocks forever */
ret = futex_wait(&lock, 0, NULL, 0);
- error("futex_wait failed\n", ret);
+ ksft_exit_fail_msg("futex_wait failed\n");
return NULL;
}
@@ -61,12 +50,11 @@ void *get_pi_lock(void *arg)
/*
* Check if the function returned the expected error
*/
-static void test_timeout(int res, int *ret, char *test_name, int err)
+static void test_timeout(int res, char *test_name, int err)
{
if (!res || errno != err) {
ksft_test_result_fail("%s returned %d\n", test_name,
res < 0 ? errno : res);
- *ret = RET_FAIL;
} else {
ksft_test_result_pass("%s succeeds\n", test_name);
}
@@ -78,10 +66,8 @@ static void test_timeout(int res, int *ret, char *test_name, int err)
static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to,
long timeout_ns)
{
- if (clock_gettime(clockid, to)) {
- error("clock_gettime failed\n", errno);
- return errno;
- }
+ if (clock_gettime(clockid, to))
+ ksft_exit_fail_msg("clock_gettime failed\n");
to->tv_nsec += timeout_ns;
@@ -93,83 +79,66 @@ static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to,
return 0;
}
-int main(int argc, char *argv[])
+TEST(wait_bitset)
{
futex_t f1 = FUTEX_INITIALIZER;
- int res, ret = RET_PASS;
struct timespec to;
- pthread_t thread;
- int c;
- struct futex_waitv waitv = {
- .uaddr = (uintptr_t)&f1,
- .val = f1,
- .flags = FUTEX_32,
- .__reserved = 0
- };
-
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 't':
- timeout_ns = atoi(optarg);
- break;
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(9);
- ksft_print_msg("%s: Block on a futex and wait for timeout\n",
- basename(argv[0]));
- ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
-
- pthread_barrier_init(&barrier, NULL, 2);
- pthread_create(&thread, NULL, get_pi_lock, NULL);
+ int res;
/* initialize relative timeout */
to.tv_sec = 0;
to.tv_nsec = timeout_ns;
res = futex_wait(&f1, f1, &to, 0);
- test_timeout(res, &ret, "futex_wait relative", ETIMEDOUT);
+ test_timeout(res, "futex_wait relative", ETIMEDOUT);
/* FUTEX_WAIT_BITSET with CLOCK_REALTIME */
if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_wait_bitset(&f1, f1, &to, 1, FUTEX_CLOCK_REALTIME);
- test_timeout(res, &ret, "futex_wait_bitset realtime", ETIMEDOUT);
+ test_timeout(res, "futex_wait_bitset realtime", ETIMEDOUT);
/* FUTEX_WAIT_BITSET with CLOCK_MONOTONIC */
if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_wait_bitset(&f1, f1, &to, 1, 0);
- test_timeout(res, &ret, "futex_wait_bitset monotonic", ETIMEDOUT);
+ test_timeout(res, "futex_wait_bitset monotonic", ETIMEDOUT);
+}
+
+TEST(requeue_pi)
+{
+ futex_t f1 = FUTEX_INITIALIZER;
+ struct timespec to;
+ int res;
/* FUTEX_WAIT_REQUEUE_PI with CLOCK_REALTIME */
if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, FUTEX_CLOCK_REALTIME);
- test_timeout(res, &ret, "futex_wait_requeue_pi realtime", ETIMEDOUT);
+ test_timeout(res, "futex_wait_requeue_pi realtime", ETIMEDOUT);
/* FUTEX_WAIT_REQUEUE_PI with CLOCK_MONOTONIC */
if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, 0);
- test_timeout(res, &ret, "futex_wait_requeue_pi monotonic", ETIMEDOUT);
+ test_timeout(res, "futex_wait_requeue_pi monotonic", ETIMEDOUT);
+
+}
+
+TEST(lock_pi)
+{
+ struct timespec to;
+ pthread_t thread;
+ int res;
+
+ /* Create a thread that will lock forever so any waiter will timeout */
+ pthread_barrier_init(&barrier, NULL, 2);
+ pthread_create(&thread, NULL, get_pi_lock, NULL);
/* Wait until the other thread calls futex_lock_pi() */
pthread_barrier_wait(&barrier);
pthread_barrier_destroy(&barrier);
+
/*
* FUTEX_LOCK_PI with CLOCK_REALTIME
* Due to historical reasons, FUTEX_LOCK_PI supports only realtime
@@ -181,26 +150,38 @@ int main(int argc, char *argv[])
* smaller than realtime and the syscall will timeout immediately.
*/
if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_lock_pi(&futex_pi, &to, 0, 0);
- test_timeout(res, &ret, "futex_lock_pi realtime", ETIMEDOUT);
+ test_timeout(res, "futex_lock_pi realtime", ETIMEDOUT);
/* Test operations that don't support FUTEX_CLOCK_REALTIME */
res = futex_lock_pi(&futex_pi, NULL, 0, FUTEX_CLOCK_REALTIME);
- test_timeout(res, &ret, "futex_lock_pi invalid timeout flag", ENOSYS);
+ test_timeout(res, "futex_lock_pi invalid timeout flag", ENOSYS);
+}
+
+TEST(waitv)
+{
+ futex_t f1 = FUTEX_INITIALIZER;
+ struct futex_waitv waitv = {
+ .uaddr = (uintptr_t)&f1,
+ .val = f1,
+ .flags = FUTEX_32,
+ .__reserved = 0,
+ };
+ struct timespec to;
+ int res;
/* futex_waitv with CLOCK_MONOTONIC */
if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
- test_timeout(res, &ret, "futex_waitv monotonic", ETIMEDOUT);
+ test_timeout(res, "futex_waitv monotonic", ETIMEDOUT);
/* futex_waitv with CLOCK_REALTIME */
if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
- return RET_FAIL;
+ ksft_test_result_error("get_time error");
res = futex_waitv(&waitv, 1, 0, &to, CLOCK_REALTIME);
- test_timeout(res, &ret, "futex_waitv realtime", ETIMEDOUT);
-
- ksft_print_cnts();
- return ret;
+ test_timeout(res, "futex_waitv realtime", ETIMEDOUT);
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
index ed9cd07e31c1..ce2301500d83 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
@@ -29,95 +29,55 @@
#include <linux/futex.h>
#include <libgen.h>
-#include "logging.h"
#include "futextest.h"
+#include "../../kselftest_harness.h"
-#define TEST_NAME "futex-wait-uninitialized-heap"
#define WAIT_US 5000000
static int child_blocked = 1;
-static int child_ret;
+static bool child_ret;
void *buf;
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
void *wait_thread(void *arg)
{
int res;
- child_ret = RET_PASS;
+ child_ret = true;
res = futex_wait(buf, 1, NULL, 0);
child_blocked = 0;
if (res != 0 && errno != EWOULDBLOCK) {
- error("futex failure\n", errno);
- child_ret = RET_ERROR;
+ ksft_exit_fail_msg("futex failure\n");
+ child_ret = false;
}
pthread_exit(NULL);
}
-int main(int argc, char **argv)
+TEST(futex_wait_uninitialized_heap)
{
- int c, ret = RET_PASS;
long page_size;
pthread_t thr;
-
- while ((c = getopt(argc, argv, "chv:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
+ int ret;
page_size = sysconf(_SC_PAGESIZE);
buf = mmap(NULL, page_size, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
- if (buf == (void *)-1) {
- error("mmap\n", errno);
- exit(1);
- }
-
- ksft_print_header();
- ksft_set_plan(1);
- ksft_print_msg("%s: Test the uninitialized futex value in FUTEX_WAIT\n",
- basename(argv[0]));
-
+ if (buf == (void *)-1)
+ ksft_exit_fail_msg("mmap\n");
ret = pthread_create(&thr, NULL, wait_thread, NULL);
- if (ret) {
- error("pthread_create\n", errno);
- ret = RET_ERROR;
- goto out;
- }
+ if (ret)
+ ksft_exit_fail_msg("pthread_create\n");
- info("waiting %dus for child to return\n", WAIT_US);
+ ksft_print_dbg_msg("waiting %dus for child to return\n", WAIT_US);
usleep(WAIT_US);
- ret = child_ret;
- if (child_blocked) {
- fail("child blocked in kernel\n");
- ret = RET_FAIL;
- }
+ if (child_blocked)
+ ksft_test_result_fail("child blocked in kernel\n");
- out:
- print_result(TEST_NAME, ret);
- return ret;
+ if (!child_ret)
+ ksft_test_result_fail("child error\n");
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
index 2d8230da9064..36b7a54a4085 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
@@ -21,72 +21,44 @@
#include <stdlib.h>
#include <string.h>
#include <time.h>
+
#include "futextest.h"
#include "futex2test.h"
-#include "logging.h"
+#include "../../kselftest_harness.h"
-#define TEST_NAME "futex-wait-wouldblock"
#define timeout_ns 100000
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
-int main(int argc, char *argv[])
+TEST(futex_wait_wouldblock)
{
struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns};
futex_t f1 = FUTEX_INITIALIZER;
- int res, ret = RET_PASS;
- int c;
- struct futex_waitv waitv = {
- .uaddr = (uintptr_t)&f1,
- .val = f1+1,
- .flags = FUTEX_32,
- .__reserved = 0
- };
+ int res;
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(2);
- ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n",
- basename(argv[0]));
-
- info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
+ ksft_print_dbg_msg("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG);
if (!res || errno != EWOULDBLOCK) {
ksft_test_result_fail("futex_wait returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_wait\n");
}
+}
- if (clock_gettime(CLOCK_MONOTONIC, &to)) {
- error("clock_gettime failed\n", errno);
- return errno;
- }
+TEST(futex_waitv_wouldblock)
+{
+ struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns};
+ futex_t f1 = FUTEX_INITIALIZER;
+ struct futex_waitv waitv = {
+ .uaddr = (uintptr_t)&f1,
+ .val = f1 + 1,
+ .flags = FUTEX_32,
+ .__reserved = 0,
+ };
+ int res;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &to))
+ ksft_exit_fail_msg("clock_gettime failed %d\n", errno);
to.tv_nsec += timeout_ns;
@@ -95,17 +67,15 @@ int main(int argc, char *argv[])
to.tv_nsec -= 1000000000;
}
- info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
+ ksft_print_dbg_msg("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
if (!res || errno != EWOULDBLOCK) {
ksft_test_result_fail("futex_waitv returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv\n");
}
-
- ksft_print_cnts();
- return ret;
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/futex_waitv.c b/tools/testing/selftests/futex/functional/futex_waitv.c
index a94337f677e1..c684b10eb76e 100644
--- a/tools/testing/selftests/futex/functional/futex_waitv.c
+++ b/tools/testing/selftests/futex/functional/futex_waitv.c
@@ -15,25 +15,16 @@
#include <pthread.h>
#include <stdint.h>
#include <sys/shm.h>
+
#include "futextest.h"
#include "futex2test.h"
-#include "logging.h"
+#include "../../kselftest_harness.h"
-#define TEST_NAME "futex-wait"
#define WAKE_WAIT_US 10000
#define NR_FUTEXES 30
static struct futex_waitv waitv[NR_FUTEXES];
u_int32_t futexes[NR_FUTEXES] = {0};
-void usage(char *prog)
-{
- printf("Usage: %s\n", prog);
- printf(" -c Use color\n");
- printf(" -h Display this help message\n");
- printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
- VQUIET, VCRITICAL, VINFO);
-}
-
void *waiterfn(void *arg)
{
struct timespec to;
@@ -41,7 +32,7 @@ void *waiterfn(void *arg)
/* setting absolute timeout for futex2 */
if (clock_gettime(CLOCK_MONOTONIC, &to))
- error("gettime64 failed\n", errno);
+ ksft_exit_fail_msg("gettime64 failed\n");
to.tv_sec++;
@@ -57,34 +48,10 @@ void *waiterfn(void *arg)
return NULL;
}
-int main(int argc, char *argv[])
+TEST(private_waitv)
{
pthread_t waiter;
- int res, ret = RET_PASS;
- struct timespec to;
- int c, i;
-
- while ((c = getopt(argc, argv, "cht:v:")) != -1) {
- switch (c) {
- case 'c':
- log_color(1);
- break;
- case 'h':
- usage(basename(argv[0]));
- exit(0);
- case 'v':
- log_verbosity(atoi(optarg));
- break;
- default:
- usage(basename(argv[0]));
- exit(1);
- }
- }
-
- ksft_print_header();
- ksft_set_plan(7);
- ksft_print_msg("%s: Test FUTEX_WAITV\n",
- basename(argv[0]));
+ int res, i;
for (i = 0; i < NR_FUTEXES; i++) {
waitv[i].uaddr = (uintptr_t)&futexes[i];
@@ -95,7 +62,7 @@ int main(int argc, char *argv[])
/* Private waitv */
if (pthread_create(&waiter, NULL, waiterfn, NULL))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
usleep(WAKE_WAIT_US);
@@ -104,10 +71,15 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_wake private returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv private\n");
}
+}
+
+TEST(shared_waitv)
+{
+ pthread_t waiter;
+ int res, i;
/* Shared waitv */
for (i = 0; i < NR_FUTEXES; i++) {
@@ -128,7 +100,7 @@ int main(int argc, char *argv[])
}
if (pthread_create(&waiter, NULL, waiterfn, NULL))
- error("pthread_create failed\n", errno);
+ ksft_exit_fail_msg("pthread_create failed\n");
usleep(WAKE_WAIT_US);
@@ -137,19 +109,24 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_wake shared returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv shared\n");
}
for (i = 0; i < NR_FUTEXES; i++)
shmdt(u64_to_ptr(waitv[i].uaddr));
+}
+
+TEST(invalid_flag)
+{
+ struct timespec to;
+ int res;
/* Testing a waiter without FUTEX_32 flag */
waitv[0].flags = FUTEX_PRIVATE_FLAG;
if (clock_gettime(CLOCK_MONOTONIC, &to))
- error("gettime64 failed\n", errno);
+ ksft_exit_fail_msg("gettime64 failed\n");
to.tv_sec++;
@@ -158,17 +135,22 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_waitv private returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv without FUTEX_32\n");
}
+}
+
+TEST(unaligned_address)
+{
+ struct timespec to;
+ int res;
/* Testing a waiter with an unaligned address */
waitv[0].flags = FUTEX_PRIVATE_FLAG | FUTEX_32;
waitv[0].uaddr = 1;
if (clock_gettime(CLOCK_MONOTONIC, &to))
- error("gettime64 failed\n", errno);
+ ksft_exit_fail_msg("gettime64 failed\n");
to.tv_sec++;
@@ -177,16 +159,21 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_wake private returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv with an unaligned address\n");
}
+}
+
+TEST(null_address)
+{
+ struct timespec to;
+ int res;
/* Testing a NULL address for waiters.uaddr */
waitv[0].uaddr = 0x00000000;
if (clock_gettime(CLOCK_MONOTONIC, &to))
- error("gettime64 failed\n", errno);
+ ksft_exit_fail_msg("gettime64 failed\n");
to.tv_sec++;
@@ -195,14 +182,13 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_waitv private returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv NULL address in waitv.uaddr\n");
}
/* Testing a NULL address for *waiters */
if (clock_gettime(CLOCK_MONOTONIC, &to))
- error("gettime64 failed\n", errno);
+ ksft_exit_fail_msg("gettime64 failed\n");
to.tv_sec++;
@@ -211,14 +197,19 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_waitv private returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv NULL address in *waiters\n");
}
+}
+
+TEST(invalid_clockid)
+{
+ struct timespec to;
+ int res;
/* Testing an invalid clockid */
if (clock_gettime(CLOCK_MONOTONIC, &to))
- error("gettime64 failed\n", errno);
+ ksft_exit_fail_msg("gettime64 failed\n");
to.tv_sec++;
@@ -227,11 +218,9 @@ int main(int argc, char *argv[])
ksft_test_result_fail("futex_waitv private returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
- ret = RET_FAIL;
} else {
ksft_test_result_pass("futex_waitv invalid clockid\n");
}
-
- ksft_print_cnts();
- return ret;
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
index 81739849f299..e88545c06d57 100755
--- a/tools/testing/selftests/futex/functional/run.sh
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -18,74 +18,36 @@
#
###############################################################################
-# Test for a color capable console
-if [ -z "$USE_COLOR" ]; then
- tput setf 7 || tput setaf 7
- if [ $? -eq 0 ]; then
- USE_COLOR=1
- tput sgr0
- fi
-fi
-if [ "$USE_COLOR" -eq 1 ]; then
- COLOR="-c"
-fi
-
-
echo
-# requeue pi testing
-# without timeouts
-./futex_requeue_pi $COLOR
-./futex_requeue_pi $COLOR -b
-./futex_requeue_pi $COLOR -b -l
-./futex_requeue_pi $COLOR -b -o
-./futex_requeue_pi $COLOR -l
-./futex_requeue_pi $COLOR -o
-# with timeouts
-./futex_requeue_pi $COLOR -b -l -t 5000
-./futex_requeue_pi $COLOR -l -t 5000
-./futex_requeue_pi $COLOR -b -l -t 500000
-./futex_requeue_pi $COLOR -l -t 500000
-./futex_requeue_pi $COLOR -b -t 5000
-./futex_requeue_pi $COLOR -t 5000
-./futex_requeue_pi $COLOR -b -t 500000
-./futex_requeue_pi $COLOR -t 500000
-./futex_requeue_pi $COLOR -b -o -t 5000
-./futex_requeue_pi $COLOR -l -t 5000
-./futex_requeue_pi $COLOR -b -o -t 500000
-./futex_requeue_pi $COLOR -l -t 500000
-# with long timeout
-./futex_requeue_pi $COLOR -b -l -t 2000000000
-./futex_requeue_pi $COLOR -l -t 2000000000
-
+./futex_requeue_pi
echo
-./futex_requeue_pi_mismatched_ops $COLOR
+./futex_requeue_pi_mismatched_ops
echo
-./futex_requeue_pi_signal_restart $COLOR
+./futex_requeue_pi_signal_restart
echo
-./futex_wait_timeout $COLOR
+./futex_wait_timeout
echo
-./futex_wait_wouldblock $COLOR
+./futex_wait_wouldblock
echo
-./futex_wait_uninitialized_heap $COLOR
-./futex_wait_private_mapped_file $COLOR
+./futex_wait_uninitialized_heap
+./futex_wait_private_mapped_file
echo
-./futex_wait $COLOR
+./futex_wait
echo
-./futex_requeue $COLOR
+./futex_requeue
echo
-./futex_waitv $COLOR
+./futex_waitv
echo
-./futex_priv_hash $COLOR
-./futex_priv_hash -g $COLOR
+./futex_priv_hash
echo
-./futex_numa_mpol $COLOR
+./futex_numa_mpol
diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h
index 7a5fd1d5355e..3d48e9789d9f 100644
--- a/tools/testing/selftests/futex/include/futextest.h
+++ b/tools/testing/selftests/futex/include/futextest.h
@@ -58,6 +58,17 @@ typedef volatile u_int32_t futex_t;
#define SYS_futex SYS_futex_time64
#endif
+/*
+ * On 32bit systems if we use "-D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64" or if
+ * we are using a newer compiler then the size of the timestamps will be 64bit,
+ * however, the SYS_futex will still point to the 32bit futex system call.
+ */
+#if __SIZEOF_POINTER__ == 4 && defined(SYS_futex_time64) && \
+ defined(_TIME_BITS) && _TIME_BITS == 64
+# undef SYS_futex
+# define SYS_futex SYS_futex_time64
+#endif
+
/**
* futex() - SYS_futex syscall wrapper
* @uaddr: address of first futex
diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h
deleted file mode 100644
index 874c69ce5cce..000000000000
--- a/tools/testing/selftests/futex/include/logging.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/******************************************************************************
- *
- * Copyright © International Business Machines Corp., 2009
- *
- * DESCRIPTION
- * Glibc independent futex library for testing kernel functionality.
- *
- * AUTHOR
- * Darren Hart <dvhart@linux.intel.com>
- *
- * HISTORY
- * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
- *
- *****************************************************************************/
-
-#ifndef _LOGGING_H
-#define _LOGGING_H
-
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <linux/futex.h>
-#include "kselftest.h"
-
-/*
- * Define PASS, ERROR, and FAIL strings with and without color escape
- * sequences, default to no color.
- */
-#define ESC 0x1B, '['
-#define BRIGHT '1'
-#define GREEN '3', '2'
-#define YELLOW '3', '3'
-#define RED '3', '1'
-#define ESCEND 'm'
-#define BRIGHT_GREEN ESC, BRIGHT, ';', GREEN, ESCEND
-#define BRIGHT_YELLOW ESC, BRIGHT, ';', YELLOW, ESCEND
-#define BRIGHT_RED ESC, BRIGHT, ';', RED, ESCEND
-#define RESET_COLOR ESC, '0', 'm'
-static const char PASS_COLOR[] = {BRIGHT_GREEN, ' ', 'P', 'A', 'S', 'S',
- RESET_COLOR, 0};
-static const char ERROR_COLOR[] = {BRIGHT_YELLOW, 'E', 'R', 'R', 'O', 'R',
- RESET_COLOR, 0};
-static const char FAIL_COLOR[] = {BRIGHT_RED, ' ', 'F', 'A', 'I', 'L',
- RESET_COLOR, 0};
-static const char INFO_NORMAL[] = " INFO";
-static const char PASS_NORMAL[] = " PASS";
-static const char ERROR_NORMAL[] = "ERROR";
-static const char FAIL_NORMAL[] = " FAIL";
-const char *INFO = INFO_NORMAL;
-const char *PASS = PASS_NORMAL;
-const char *ERROR = ERROR_NORMAL;
-const char *FAIL = FAIL_NORMAL;
-
-/* Verbosity setting for INFO messages */
-#define VQUIET 0
-#define VCRITICAL 1
-#define VINFO 2
-#define VMAX VINFO
-int _verbose = VCRITICAL;
-
-/* Functional test return codes */
-#define RET_PASS 0
-#define RET_ERROR -1
-#define RET_FAIL -2
-
-/**
- * log_color() - Use colored output for PASS, ERROR, and FAIL strings
- * @use_color: use color (1) or not (0)
- */
-void log_color(int use_color)
-{
- if (use_color) {
- PASS = PASS_COLOR;
- ERROR = ERROR_COLOR;
- FAIL = FAIL_COLOR;
- } else {
- PASS = PASS_NORMAL;
- ERROR = ERROR_NORMAL;
- FAIL = FAIL_NORMAL;
- }
-}
-
-/**
- * log_verbosity() - Set verbosity of test output
- * @verbose: Enable (1) verbose output or not (0)
- *
- * Currently setting verbose=1 will enable INFO messages and 0 will disable
- * them. FAIL and ERROR messages are always displayed.
- */
-void log_verbosity(int level)
-{
- if (level > VMAX)
- level = VMAX;
- else if (level < 0)
- level = 0;
- _verbose = level;
-}
-
-/**
- * print_result() - Print standard PASS | ERROR | FAIL results
- * @ret: the return value to be considered: 0 | RET_ERROR | RET_FAIL
- *
- * print_result() is primarily intended for functional tests.
- */
-void print_result(const char *test_name, int ret)
-{
- switch (ret) {
- case RET_PASS:
- ksft_test_result_pass("%s\n", test_name);
- ksft_print_cnts();
- return;
- case RET_ERROR:
- ksft_test_result_error("%s\n", test_name);
- ksft_print_cnts();
- return;
- case RET_FAIL:
- ksft_test_result_fail("%s\n", test_name);
- ksft_print_cnts();
- return;
- }
-}
-
-/* log level macros */
-#define info(message, vargs...) \
-do { \
- if (_verbose >= VINFO) \
- fprintf(stderr, "\t%s: "message, INFO, ##vargs); \
-} while (0)
-
-#define error(message, err, args...) \
-do { \
- if (_verbose >= VCRITICAL) {\
- if (err) \
- fprintf(stderr, "\t%s: %s: "message, \
- ERROR, strerror(err), ##args); \
- else \
- fprintf(stderr, "\t%s: "message, ERROR, ##args); \
- } \
-} while (0)
-
-#define fail(message, args...) \
-do { \
- if (_verbose >= VCRITICAL) \
- fprintf(stderr, "\t%s: "message, FAIL, ##args); \
-} while (0)
-
-#endif
diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c
index 651fc9f13c08..45c14323a618 100644
--- a/tools/testing/selftests/iommu/iommufd_fail_nth.c
+++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c
@@ -113,7 +113,7 @@ static bool fail_nth_next(struct __test_metadata *_metadata,
* necessarily mean a test failure, just that the limit has to be made
* bigger.
*/
- ASSERT_GT(400, nth_state->iteration);
+ ASSERT_GT(1000, nth_state->iteration);
if (nth_state->iteration != 0) {
ssize_t res;
ssize_t res2;
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index c3b6d2604b1e..8deeb4b72e73 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -54,6 +54,7 @@
#include <stdlib.h>
#include <unistd.h>
#include <stdarg.h>
+#include <stdbool.h>
#include <string.h>
#include <stdio.h>
#include <sys/utsname.h>
@@ -104,6 +105,7 @@ struct ksft_count {
static struct ksft_count ksft_cnt;
static unsigned int ksft_plan;
+static bool ksft_debug_enabled;
static inline unsigned int ksft_test_num(void)
{
@@ -175,6 +177,18 @@ static inline __printf(1, 2) void ksft_print_msg(const char *msg, ...)
va_end(args);
}
+static inline void ksft_print_dbg_msg(const char *msg, ...)
+{
+ va_list args;
+
+ if (!ksft_debug_enabled)
+ return;
+
+ va_start(args, msg);
+ ksft_print_msg(msg, args);
+ va_end(args);
+}
+
static inline void ksft_perror(const char *msg)
{
ksft_print_msg("%s: %s (%d)\n", msg, strerror(errno), errno);
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 2925e47db995..3f66e862e83e 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -751,7 +751,7 @@
for (; _metadata->trigger; _metadata->trigger = \
__bail(_assert, _metadata))
-#define is_signed_type(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1))
+#define is_signed_var(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1))
#define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \
/* Avoid multiple evaluation of the cases */ \
@@ -759,7 +759,7 @@
__typeof__(_seen) __seen = (_seen); \
if (!(__exp _t __seen)) { \
/* Report with actual signedness to avoid weird output. */ \
- switch (is_signed_type(__exp) * 2 + is_signed_type(__seen)) { \
+ switch (is_signed_var(__exp) * 2 + is_signed_var(__seen)) { \
case 0: { \
uintmax_t __exp_print = (uintmax_t)__exp; \
uintmax_t __seen_print = (uintmax_t)__seen; \
@@ -1091,7 +1091,7 @@ static int test_harness_argv_check(int argc, char **argv)
{
int opt;
- while ((opt = getopt(argc, argv, "hlF:f:V:v:t:T:r:")) != -1) {
+ while ((opt = getopt(argc, argv, "dhlF:f:V:v:t:T:r:")) != -1) {
switch (opt) {
case 'f':
case 'F':
@@ -1104,12 +1104,16 @@ static int test_harness_argv_check(int argc, char **argv)
case 'l':
test_harness_list_tests();
return KSFT_SKIP;
+ case 'd':
+ ksft_debug_enabled = true;
+ break;
case 'h':
default:
fprintf(stderr,
- "Usage: %s [-h|-l] [-t|-T|-v|-V|-f|-F|-r name]\n"
+ "Usage: %s [-h|-l|-d] [-t|-T|-v|-V|-f|-F|-r name]\n"
"\t-h print help\n"
"\t-l list all tests\n"
+ "\t-d enable debug prints\n"
"\n"
"\t-t name include test\n"
"\t-T name exclude test\n"
@@ -1142,8 +1146,9 @@ static bool test_enabled(int argc, char **argv,
int opt;
optind = 1;
- while ((opt = getopt(argc, argv, "F:f:V:v:t:T:r:")) != -1) {
- has_positive |= islower(opt);
+ while ((opt = getopt(argc, argv, "dF:f:V:v:t:T:r:")) != -1) {
+ if (opt != 'd')
+ has_positive |= islower(opt);
switch (tolower(opt)) {
case 't':
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index f6fe7a07a0a2..41b40c676d7f 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -169,6 +169,7 @@ TEST_GEN_PROGS_arm64 += arm64/vgic_irq
TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress
TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access
TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3
+TEST_GEN_PROGS_arm64 += arm64/kvm-uuid
TEST_GEN_PROGS_arm64 += access_tracking_perf_test
TEST_GEN_PROGS_arm64 += arch_timer
TEST_GEN_PROGS_arm64 += coalesced_io_test
diff --git a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
index cef8f7323ceb..713005b6f508 100644
--- a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
@@ -146,7 +146,7 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
- el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+ el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val);
return el0 == ID_AA64PFR0_EL1_EL0_IMP;
}
diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
index e34963956fbc..1d431de8729c 100644
--- a/tools/testing/selftests/kvm/arm64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
@@ -116,12 +116,12 @@ static void reset_debug_state(void)
/* Reset all bcr/bvr/wcr/wvr registers */
dfr0 = read_sysreg(id_aa64dfr0_el1);
- brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0);
+ brps = FIELD_GET(ID_AA64DFR0_EL1_BRPs, dfr0);
for (i = 0; i <= brps; i++) {
write_dbgbcr(i, 0);
write_dbgbvr(i, 0);
}
- wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0);
+ wrps = FIELD_GET(ID_AA64DFR0_EL1_WRPs, dfr0);
for (i = 0; i <= wrps; i++) {
write_dbgwcr(i, 0);
write_dbgwvr(i, 0);
@@ -418,7 +418,7 @@ static void guest_code_ss(int test_cnt)
static int debug_version(uint64_t id_aa64dfr0)
{
- return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0);
+ return FIELD_GET(ID_AA64DFR0_EL1_DebugVer, id_aa64dfr0);
}
static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
@@ -539,14 +539,14 @@ void test_guest_debug_exceptions_all(uint64_t aa64dfr0)
int b, w, c;
/* Number of breakpoints */
- brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1;
+ brp_num = FIELD_GET(ID_AA64DFR0_EL1_BRPs, aa64dfr0) + 1;
__TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required");
/* Number of watchpoints */
- wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1;
+ wrp_num = FIELD_GET(ID_AA64DFR0_EL1_WRPs, aa64dfr0) + 1;
/* Number of context aware breakpoints */
- ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1;
+ ctx_brp_num = FIELD_GET(ID_AA64DFR0_EL1_CTX_CMPs, aa64dfr0) + 1;
pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__,
brp_num, wrp_num, ctx_brp_num);
diff --git a/tools/testing/selftests/kvm/arm64/kvm-uuid.c b/tools/testing/selftests/kvm/arm64/kvm-uuid.c
new file mode 100644
index 000000000000..af9581b860f1
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/kvm-uuid.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Check that nobody has tampered with KVM's UID
+
+#include <errno.h>
+#include <linux/arm-smccc.h>
+#include <asm/kvm.h>
+#include <kvm_util.h>
+
+#include "processor.h"
+
+/*
+ * Do NOT redefine these constants, or try to replace them with some
+ * "common" version. They are hardcoded here to detect any potential
+ * breakage happening in the rest of the kernel.
+ *
+ * KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74
+ */
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 0xb66fb428U
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 0xe911c52eU
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU
+
+static void guest_code(void)
+{
+ struct arm_smccc_res res = {};
+
+ smccc_hvc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res);
+
+ __GUEST_ASSERT(res.a0 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 &&
+ res.a1 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 &&
+ res.a2 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 &&
+ res.a3 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3,
+ "Unexpected KVM-specific UID %lx %lx %lx %lx\n", res.a0, res.a1, res.a2, res.a3);
+ GUEST_DONE();
+}
+
+int main (int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool guest_done = false;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ while (!guest_done) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ guest_done = true;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ printf("%s", uc.buffer);
+ break;
+ default:
+ TEST_FAIL("Unexpected guest exit");
+ }
+ }
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
index ebd70430c89d..f222538e6084 100644
--- a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
+++ b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
@@ -54,7 +54,7 @@ static void guest_code(void)
* Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
* hidden the feature at runtime without any other userspace action.
*/
- __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC),
+ __GUEST_ASSERT(FIELD_GET(ID_AA64PFR0_EL1_GIC,
read_sysreg(id_aa64pfr0_el1)) == 0,
"GICv3 wrongly advertised");
@@ -165,7 +165,7 @@ int main(int argc, char *argv[])
vm = vm_create_with_one_vcpu(&vcpu, NULL);
pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
- __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0),
+ __TEST_REQUIRE(FIELD_GET(ID_AA64PFR0_EL1_GIC, pfr0),
"GICv3 not supported.");
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c
index dc6559dad9d8..4ccbd389d133 100644
--- a/tools/testing/selftests/kvm/arm64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c
@@ -95,14 +95,14 @@ static bool guest_check_lse(void)
uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
uint64_t atomic;
- atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0);
+ atomic = FIELD_GET(ID_AA64ISAR0_EL1_ATOMIC, isar0);
return atomic >= 2;
}
static bool guest_check_dc_zva(void)
{
uint64_t dczid = read_sysreg(dczid_el0);
- uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid);
+ uint64_t dzp = FIELD_GET(DCZID_EL0_DZP, dczid);
return dzp == 0;
}
@@ -195,7 +195,7 @@ static bool guest_set_ha(void)
uint64_t hadbs, tcr;
/* Skip if HA is not supported. */
- hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1);
+ hadbs = FIELD_GET(ID_AA64MMFR1_EL1_HAFDBS, mmfr1);
if (hadbs == 0)
return false;
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
index d3bf9204409c..189321e96925 100644
--- a/tools/testing/selftests/kvm/arm64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -243,6 +243,7 @@ static void guest_code(void)
GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64MMFR3_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
GUEST_REG_SYNC(SYS_CTR_EL0);
GUEST_REG_SYNC(SYS_MIDR_EL1);
@@ -594,8 +595,8 @@ static void test_user_set_mte_reg(struct kvm_vcpu *vcpu)
*/
val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
- mte = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), val);
- mte_frac = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac), val);
+ mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val);
+ mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val);
if (mte != ID_AA64PFR1_EL1_MTE_MTE2 ||
mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) {
ksft_test_result_skip("MTE_ASYNC or MTE_ASYMM are supported, nothing to test\n");
@@ -612,7 +613,7 @@ static void test_user_set_mte_reg(struct kvm_vcpu *vcpu)
}
val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
- mte_frac = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac), val);
+ mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val);
if (mte_frac == ID_AA64PFR1_EL1_MTE_frac_NI)
ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac=0 accepted and still 0xF\n");
else
@@ -774,7 +775,7 @@ int main(void)
/* Check for AARCH64 only system */
val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
- el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+ el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val);
aarch64_only = (el0 == ID_AA64PFR0_EL1_EL0_IMP);
ksft_print_header();
diff --git a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
index f16b3b27e32e..a0c4ab839155 100644
--- a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
+++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
@@ -441,7 +441,7 @@ static void create_vpmu_vm(void *guest_code)
/* Make sure that PMUv3 support is indicated in the ID register */
dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
- pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0);
+ pmuver = FIELD_GET(ID_AA64DFR0_EL1_PMUVer, dfr0);
TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF &&
pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP,
"Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver);
diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c
index 9d69904cb608..eb115123d741 100644
--- a/tools/testing/selftests/kvm/lib/arm64/processor.c
+++ b/tools/testing/selftests/kvm/lib/arm64/processor.c
@@ -573,15 +573,15 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
- gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val);
+ gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN4, val);
*ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI,
ID_AA64MMFR0_EL1_TGRAN4_52_BIT);
- gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val);
+ gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN64, val);
*ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI,
ID_AA64MMFR0_EL1_TGRAN64_IMP);
- gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val);
+ gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN16, val);
*ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI,
ID_AA64MMFR0_EL1_TGRAN16_52_BIT);
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index d30625c18259..c744c603d688 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -1554,8 +1554,8 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
}
/* Read from the page to populate the shared zeropage. */
- FORCE_READ(mem);
- FORCE_READ(smem);
+ FORCE_READ(*mem);
+ FORCE_READ(*smem);
fn(mem, smem, pagesize);
munmap:
diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c
index b0d42eb04e3a..8dd81c0a4a5a 100644
--- a/tools/testing/selftests/mm/guard-regions.c
+++ b/tools/testing/selftests/mm/guard-regions.c
@@ -145,7 +145,7 @@ static bool try_access_buf(char *ptr, bool write)
if (write)
*ptr = 'x';
else
- FORCE_READ(ptr);
+ FORCE_READ(*ptr);
}
signal_jump_set = false;
diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c
index 1afe14b9dc0c..c5940c0595be 100644
--- a/tools/testing/selftests/mm/hugetlb-madvise.c
+++ b/tools/testing/selftests/mm/hugetlb-madvise.c
@@ -50,8 +50,10 @@ void read_fault_pages(void *addr, unsigned long nr_pages)
unsigned long i;
for (i = 0; i < nr_pages; i++) {
+ unsigned long *addr2 =
+ ((unsigned long *)(addr + (i * huge_page_size)));
/* Prevent the compiler from optimizing out the entire loop: */
- FORCE_READ(((unsigned long *)(addr + (i * huge_page_size))));
+ FORCE_READ(*addr2);
}
}
diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c
index c5a73617796a..ea945eebec2f 100644
--- a/tools/testing/selftests/mm/migration.c
+++ b/tools/testing/selftests/mm/migration.c
@@ -110,7 +110,7 @@ void *access_mem(void *ptr)
* the memory access actually happens and prevents the compiler
* from optimizing away this entire loop.
*/
- FORCE_READ((uint64_t *)ptr);
+ FORCE_READ(*(uint64_t *)ptr);
}
return NULL;
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c
index fccf9e797a0c..5bd52a951cbd 100644
--- a/tools/testing/selftests/mm/mremap_test.c
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -5,10 +5,14 @@
#define _GNU_SOURCE
#include <errno.h>
+#include <fcntl.h>
+#include <linux/userfaultfd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
+#include <sys/ioctl.h>
#include <sys/mman.h>
+#include <syscall.h>
#include <time.h>
#include <stdbool.h>
@@ -168,6 +172,7 @@ static bool is_range_mapped(FILE *maps_fp, unsigned long start,
if (first_val <= start && second_val >= end) {
success = true;
+ fflush(maps_fp);
break;
}
}
@@ -175,6 +180,15 @@ static bool is_range_mapped(FILE *maps_fp, unsigned long start,
return success;
}
+/* Check if [ptr, ptr + size) mapped in /proc/self/maps. */
+static bool is_ptr_mapped(FILE *maps_fp, void *ptr, unsigned long size)
+{
+ unsigned long start = (unsigned long)ptr;
+ unsigned long end = start + size;
+
+ return is_range_mapped(maps_fp, start, end);
+}
+
/*
* Returns the start address of the mapping on success, else returns
* NULL on failure.
@@ -733,6 +747,249 @@ out:
dont_unmap ? " [dontunnmap]" : "");
}
+#ifdef __NR_userfaultfd
+static void mremap_move_multi_invalid_vmas(FILE *maps_fp,
+ unsigned long page_size)
+{
+ char *test_name = "mremap move multiple invalid vmas";
+ const size_t size = 10 * page_size;
+ bool success = true;
+ char *ptr, *tgt_ptr;
+ int uffd, err, i;
+ void *res;
+ struct uffdio_api api = {
+ .api = UFFD_API,
+ .features = UFFD_EVENT_PAGEFAULT,
+ };
+
+ uffd = syscall(__NR_userfaultfd, O_NONBLOCK);
+ if (uffd == -1) {
+ err = errno;
+ perror("userfaultfd");
+ if (err == EPERM) {
+ ksft_test_result_skip("%s - missing uffd", test_name);
+ return;
+ }
+ success = false;
+ goto out;
+ }
+ if (ioctl(uffd, UFFDIO_API, &api)) {
+ perror("ioctl UFFDIO_API");
+ success = false;
+ goto out_close_uffd;
+ }
+
+ ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (ptr == MAP_FAILED) {
+ perror("mmap");
+ success = false;
+ goto out_close_uffd;
+ }
+
+ tgt_ptr = mmap(NULL, size, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (tgt_ptr == MAP_FAILED) {
+ perror("mmap");
+ success = false;
+ goto out_close_uffd;
+ }
+ if (munmap(tgt_ptr, size)) {
+ perror("munmap");
+ success = false;
+ goto out_unmap;
+ }
+
+ /*
+ * Unmap so we end up with:
+ *
+ * 0 2 4 6 8 10 offset in buffer
+ * |*| |*| |*| |*| |*|
+ * |*| |*| |*| |*| |*|
+ *
+ * Additionally, register each with UFFD.
+ */
+ for (i = 0; i < 10; i += 2) {
+ void *unmap_ptr = &ptr[(i + 1) * page_size];
+ unsigned long start = (unsigned long)&ptr[i * page_size];
+ struct uffdio_register reg = {
+ .range = {
+ .start = start,
+ .len = page_size,
+ },
+ .mode = UFFDIO_REGISTER_MODE_MISSING,
+ };
+
+ if (ioctl(uffd, UFFDIO_REGISTER, &reg) == -1) {
+ perror("ioctl UFFDIO_REGISTER");
+ success = false;
+ goto out_unmap;
+ }
+ if (munmap(unmap_ptr, page_size)) {
+ perror("munmap");
+ success = false;
+ goto out_unmap;
+ }
+ }
+
+ /*
+ * Now try to move the entire range which is invalid for multi VMA move.
+ *
+ * This will fail, and no VMA should be moved, as we check this ahead of
+ * time.
+ */
+ res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr);
+ err = errno;
+ if (res != MAP_FAILED) {
+ fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n");
+ success = false;
+ goto out_unmap;
+ }
+ if (err != EFAULT) {
+ errno = err;
+ perror("mrmeap() unexpected error");
+ success = false;
+ goto out_unmap;
+ }
+ if (is_ptr_mapped(maps_fp, tgt_ptr, page_size)) {
+ fprintf(stderr,
+ "Invalid uffd-armed VMA at start of multi range moved\n");
+ success = false;
+ goto out_unmap;
+ }
+
+ /*
+ * Now try to move a single VMA, this should succeed as not multi VMA
+ * move.
+ */
+ res = mremap(ptr, page_size, page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr);
+ if (res == MAP_FAILED) {
+ perror("mremap single invalid-multi VMA");
+ success = false;
+ goto out_unmap;
+ }
+
+ /*
+ * Unmap the VMA, and remap a non-uffd registered (therefore, multi VMA
+ * move valid) VMA at the start of ptr range.
+ */
+ if (munmap(tgt_ptr, page_size)) {
+ perror("munmap");
+ success = false;
+ goto out_unmap;
+ }
+ res = mmap(ptr, page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+ if (res == MAP_FAILED) {
+ perror("mmap");
+ success = false;
+ goto out_unmap;
+ }
+
+ /*
+ * Now try to move the entire range, we should succeed in moving the
+ * first VMA, but no others, and report a failure.
+ */
+ res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr);
+ err = errno;
+ if (res != MAP_FAILED) {
+ fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n");
+ success = false;
+ goto out_unmap;
+ }
+ if (err != EFAULT) {
+ errno = err;
+ perror("mrmeap() unexpected error");
+ success = false;
+ goto out_unmap;
+ }
+ if (!is_ptr_mapped(maps_fp, tgt_ptr, page_size)) {
+ fprintf(stderr, "Valid VMA not moved\n");
+ success = false;
+ goto out_unmap;
+ }
+
+ /*
+ * Unmap the VMA, and map valid VMA at start of ptr range, and replace
+ * all existing multi-move invalid VMAs, except the last, with valid
+ * multi-move VMAs.
+ */
+ if (munmap(tgt_ptr, page_size)) {
+ perror("munmap");
+ success = false;
+ goto out_unmap;
+ }
+ if (munmap(ptr, size - 2 * page_size)) {
+ perror("munmap");
+ success = false;
+ goto out_unmap;
+ }
+ for (i = 0; i < 8; i += 2) {
+ res = mmap(&ptr[i * page_size], page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+ if (res == MAP_FAILED) {
+ perror("mmap");
+ success = false;
+ goto out_unmap;
+ }
+ }
+
+ /*
+ * Now try to move the entire range, we should succeed in moving all but
+ * the last VMA, and report a failure.
+ */
+ res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr);
+ err = errno;
+ if (res != MAP_FAILED) {
+ fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n");
+ success = false;
+ goto out_unmap;
+ }
+ if (err != EFAULT) {
+ errno = err;
+ perror("mrmeap() unexpected error");
+ success = false;
+ goto out_unmap;
+ }
+
+ for (i = 0; i < 10; i += 2) {
+ bool is_mapped = is_ptr_mapped(maps_fp,
+ &tgt_ptr[i * page_size], page_size);
+
+ if (i < 8 && !is_mapped) {
+ fprintf(stderr, "Valid VMA not moved at %d\n", i);
+ success = false;
+ goto out_unmap;
+ } else if (i == 8 && is_mapped) {
+ fprintf(stderr, "Invalid VMA moved at %d\n", i);
+ success = false;
+ goto out_unmap;
+ }
+ }
+
+out_unmap:
+ if (munmap(tgt_ptr, size))
+ perror("munmap tgt");
+ if (munmap(ptr, size))
+ perror("munmap src");
+out_close_uffd:
+ close(uffd);
+out:
+ if (success)
+ ksft_test_result_pass("%s\n", test_name);
+ else
+ ksft_test_result_fail("%s\n", test_name);
+}
+#else
+static void mremap_move_multi_invalid_vmas(FILE *maps_fp, unsigned long page_size)
+{
+ char *test_name = "mremap move multiple invalid vmas";
+
+ ksft_test_result_skip("%s - missing uffd", test_name);
+}
+#endif /* __NR_userfaultfd */
+
/* Returns the time taken for the remap on success else returns -1. */
static long long remap_region(struct config c, unsigned int threshold_mb,
char *rand_addr)
@@ -1074,7 +1331,7 @@ int main(int argc, char **argv)
char *rand_addr;
size_t rand_size;
int num_expand_tests = 2;
- int num_misc_tests = 8;
+ int num_misc_tests = 9;
struct test test_cases[MAX_TEST] = {};
struct test perf_test_cases[MAX_PERF_TEST];
int page_size;
@@ -1197,8 +1454,6 @@ int main(int argc, char **argv)
mremap_expand_merge(maps_fp, page_size);
mremap_expand_merge_offset(maps_fp, page_size);
- fclose(maps_fp);
-
mremap_move_within_range(pattern_seed, rand_addr);
mremap_move_1mb_from_start(pattern_seed, rand_addr);
mremap_shrink_multiple_vmas(page_size, /* inplace= */true);
@@ -1207,6 +1462,9 @@ int main(int argc, char **argv)
mremap_move_multiple_vmas(pattern_seed, page_size, /* dontunmap= */ true);
mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ false);
mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ true);
+ mremap_move_multi_invalid_vmas(maps_fp, page_size);
+
+ fclose(maps_fp);
if (run_perf_tests) {
ksft_print_msg("\n%s\n",
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
index 0d4209eef0c3..e6face7c0166 100644
--- a/tools/testing/selftests/mm/pagemap_ioctl.c
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -1525,7 +1525,7 @@ void zeropfn_tests(void)
ret = madvise(mem, hpage_size, MADV_HUGEPAGE);
if (!ret) {
- FORCE_READ(mem);
+ FORCE_READ(*mem);
ret = pagemap_ioctl(mem, hpage_size, &vec, 1, 0,
0, PAGE_IS_PFNZERO, 0, 0, PAGE_IS_PFNZERO);
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index 05de1fc0005b..44a3f8a58806 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -439,8 +439,11 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
}
madvise(*addr, fd_size, MADV_HUGEPAGE);
- for (size_t i = 0; i < fd_size; i++)
- FORCE_READ((*addr + i));
+ for (size_t i = 0; i < fd_size; i++) {
+ char *addr2 = *addr + i;
+
+ FORCE_READ(*addr2);
+ }
if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) {
ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n");
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index c20298ae98ea..b55d1809debc 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -23,7 +23,7 @@
* anything with it in order to trigger a read page fault. We therefore must use
* volatile to stop the compiler from optimising this away.
*/
-#define FORCE_READ(x) (*(volatile typeof(x) *)x)
+#define FORCE_READ(x) (*(const volatile typeof(x) *)&(x))
extern unsigned int __page_size;
extern unsigned int __page_shift;
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index b1e4618399be..a688871a98eb 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -107,6 +107,26 @@
#endif
#endif
+#ifndef __NR_open_tree_attr
+ #if defined __alpha__
+ #define __NR_open_tree_attr 577
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_open_tree_attr (467 + 4000)
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_open_tree_attr (467 + 6000)
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_open_tree_attr (467 + 5000)
+ #endif
+ #elif defined __ia64__
+ #define __NR_open_tree_attr (467 + 1024)
+ #else
+ #define __NR_open_tree_attr 467
+ #endif
+#endif
+
#ifndef MOUNT_ATTR_IDMAP
#define MOUNT_ATTR_IDMAP 0x00100000
#endif
@@ -121,6 +141,12 @@ static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flag
return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
}
+static inline int sys_open_tree_attr(int dfd, const char *path, unsigned int flags,
+ struct mount_attr *attr, size_t size)
+{
+ return syscall(__NR_open_tree_attr, dfd, path, flags, attr, size);
+}
+
static ssize_t write_nointr(int fd, const void *buf, size_t count)
{
ssize_t ret;
@@ -1222,6 +1248,12 @@ TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
attr.userns_fd = get_userns_fd(0, 10000, 10000);
ASSERT_GE(attr.userns_fd, 0);
ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ /*
+ * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way
+ * to bypass this mount_setattr() restriction.
+ */
+ ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+
ASSERT_EQ(close(attr.userns_fd), 0);
ASSERT_EQ(close(open_tree_fd), 0);
}
@@ -1255,6 +1287,12 @@ TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
ASSERT_GE(attr.userns_fd, 0);
ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
sizeof(attr)), 0);
+ /*
+ * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way
+ * to bypass this mount_setattr() restriction.
+ */
+ ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+
ASSERT_EQ(close(attr.userns_fd), 0);
ASSERT_EQ(close(open_tree_fd), 0);
}
@@ -1321,6 +1359,19 @@ TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
ASSERT_EQ(close(open_tree_fd), 0);
}
+static bool expected_uid_gid(int dfd, const char *path, int flags,
+ uid_t expected_uid, gid_t expected_gid)
+{
+ int ret;
+ struct stat st;
+
+ ret = fstatat(dfd, path, &st, flags);
+ if (ret < 0)
+ return false;
+
+ return st.st_uid == expected_uid && st.st_gid == expected_gid;
+}
+
/**
* Validate that currently changing the idmapping of an idmapped mount fails.
*/
@@ -1331,6 +1382,8 @@ TEST_F(mount_setattr_idmapped, change_idmapping)
.attr_set = MOUNT_ATTR_IDMAP,
};
+ ASSERT_TRUE(expected_uid_gid(-EBADF, "/mnt/D", 0, 0, 0));
+
if (!mount_setattr_supported())
SKIP(return, "mount_setattr syscall not supported");
@@ -1348,27 +1401,25 @@ TEST_F(mount_setattr_idmapped, change_idmapping)
AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
ASSERT_EQ(close(attr.userns_fd), 0);
+ EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 0, 0));
+ EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000));
+
/* Change idmapping on a detached mount that is already idmapped. */
attr.userns_fd = get_userns_fd(0, 20000, 10000);
ASSERT_GE(attr.userns_fd, 0);
ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ /*
+ * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way
+ * to bypass this mount_setattr() restriction.
+ */
+ EXPECT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 20000, 20000));
+ EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000));
+
ASSERT_EQ(close(attr.userns_fd), 0);
ASSERT_EQ(close(open_tree_fd), 0);
}
-static bool expected_uid_gid(int dfd, const char *path, int flags,
- uid_t expected_uid, gid_t expected_gid)
-{
- int ret;
- struct stat st;
-
- ret = fstatat(dfd, path, &st, flags);
- if (ret < 0)
- return false;
-
- return st.st_uid == expected_uid && st.st_gid == expected_gid;
-}
-
TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
{
int open_tree_fd = -EBADF;
diff --git a/tools/testing/selftests/namespaces/.gitignore b/tools/testing/selftests/namespaces/.gitignore
new file mode 100644
index 000000000000..ccfb40837a73
--- /dev/null
+++ b/tools/testing/selftests/namespaces/.gitignore
@@ -0,0 +1,3 @@
+nsid_test
+file_handle_test
+init_ino_test
diff --git a/tools/testing/selftests/namespaces/Makefile b/tools/testing/selftests/namespaces/Makefile
new file mode 100644
index 000000000000..5fe4b3dc07d3
--- /dev/null
+++ b/tools/testing/selftests/namespaces/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+
+TEST_GEN_PROGS := nsid_test file_handle_test init_ino_test
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/namespaces/config b/tools/testing/selftests/namespaces/config
new file mode 100644
index 000000000000..d09836260262
--- /dev/null
+++ b/tools/testing/selftests/namespaces/config
@@ -0,0 +1,7 @@
+CONFIG_UTS_NS=y
+CONFIG_TIME_NS=y
+CONFIG_IPC_NS=y
+CONFIG_USER_NS=y
+CONFIG_PID_NS=y
+CONFIG_NET_NS=y
+CONFIG_CGROUPS=y
diff --git a/tools/testing/selftests/namespaces/file_handle_test.c b/tools/testing/selftests/namespaces/file_handle_test.c
new file mode 100644
index 000000000000..f1bc5773f552
--- /dev/null
+++ b/tools/testing/selftests/namespaces/file_handle_test.c
@@ -0,0 +1,1429 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <linux/unistd.h>
+#include "../kselftest_harness.h"
+
+#ifndef FD_NSFS_ROOT
+#define FD_NSFS_ROOT -10003 /* Root of the nsfs filesystem */
+#endif
+
+TEST(nsfs_net_handle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ struct stat st1, st2;
+
+ /* Drop to unprivileged uid/gid */
+ ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */
+ ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open a namespace file descriptor */
+ ns_fd = open("/proc/self/ns/net", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ /* Get handle for the namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ ASSERT_GT(handle->handle_bytes, 0);
+
+ /* Try to open using FD_NSFS_ROOT as unprivileged user */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+ SKIP(free(handle); close(ns_fd);
+ return,
+ "open_by_handle_at with FD_NSFS_ROOT not supported");
+ }
+ if (fd < 0 && errno == EPERM) {
+ SKIP(free(handle); close(ns_fd);
+ return,
+ "Permission denied for unprivileged user (expected)");
+ }
+ ASSERT_GE(fd, 0);
+
+ /* Verify we opened the correct namespace */
+ ASSERT_EQ(fstat(ns_fd, &st1), 0);
+ ASSERT_EQ(fstat(fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+ close(fd);
+ close(ns_fd);
+ free(handle);
+}
+
+TEST(nsfs_uts_handle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ struct stat st1, st2;
+
+ /* Drop to unprivileged uid/gid */
+ ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */
+ ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open UTS namespace file descriptor */
+ ns_fd = open("/proc/self/ns/uts", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ /* Get handle for the namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ ASSERT_GT(handle->handle_bytes, 0);
+
+ /* Try to open using FD_NSFS_ROOT */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+ SKIP(free(handle); close(ns_fd);
+ return,
+ "open_by_handle_at with FD_NSFS_ROOT not supported");
+ }
+ ASSERT_GE(fd, 0);
+
+ /* Verify we opened the correct namespace */
+ ASSERT_EQ(fstat(ns_fd, &st1), 0);
+ ASSERT_EQ(fstat(fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+ close(fd);
+ close(ns_fd);
+ free(handle);
+}
+
+TEST(nsfs_ipc_handle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ struct stat st1, st2;
+
+ /* Drop to unprivileged uid/gid */
+ ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */
+ ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open IPC namespace file descriptor */
+ ns_fd = open("/proc/self/ns/ipc", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ /* Get handle for the namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ ASSERT_GT(handle->handle_bytes, 0);
+
+ /* Try to open using FD_NSFS_ROOT */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+ SKIP(free(handle); close(ns_fd);
+ return,
+ "open_by_handle_at with FD_NSFS_ROOT not supported");
+ }
+ ASSERT_GE(fd, 0);
+
+ /* Verify we opened the correct namespace */
+ ASSERT_EQ(fstat(ns_fd, &st1), 0);
+ ASSERT_EQ(fstat(fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+ close(fd);
+ close(ns_fd);
+ free(handle);
+}
+
+TEST(nsfs_pid_handle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ struct stat st1, st2;
+
+ /* Drop to unprivileged uid/gid */
+ ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */
+ ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open PID namespace file descriptor */
+ ns_fd = open("/proc/self/ns/pid", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ /* Get handle for the namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ ASSERT_GT(handle->handle_bytes, 0);
+
+ /* Try to open using FD_NSFS_ROOT */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+ SKIP(free(handle); close(ns_fd);
+ return,
+ "open_by_handle_at with FD_NSFS_ROOT not supported");
+ }
+ ASSERT_GE(fd, 0);
+
+ /* Verify we opened the correct namespace */
+ ASSERT_EQ(fstat(ns_fd, &st1), 0);
+ ASSERT_EQ(fstat(fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+ close(fd);
+ close(ns_fd);
+ free(handle);
+}
+
+TEST(nsfs_mnt_handle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ struct stat st1, st2;
+
+ /* Drop to unprivileged uid/gid */
+ ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */
+ ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open mount namespace file descriptor */
+ ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ /* Get handle for the namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ ASSERT_GT(handle->handle_bytes, 0);
+
+ /* Try to open using FD_NSFS_ROOT */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+ SKIP(free(handle); close(ns_fd);
+ return,
+ "open_by_handle_at with FD_NSFS_ROOT not supported");
+ }
+ ASSERT_GE(fd, 0);
+
+ /* Verify we opened the correct namespace */
+ ASSERT_EQ(fstat(ns_fd, &st1), 0);
+ ASSERT_EQ(fstat(fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+ close(fd);
+ close(ns_fd);
+ free(handle);
+}
+
+TEST(nsfs_user_handle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ struct stat st1, st2;
+
+ /* Drop to unprivileged uid/gid */
+ ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */
+ ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open user namespace file descriptor */
+ ns_fd = open("/proc/self/ns/user", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ /* Get handle for the namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ ASSERT_GT(handle->handle_bytes, 0);
+
+ /* Try to open using FD_NSFS_ROOT */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+ SKIP(free(handle); close(ns_fd);
+ return,
+ "open_by_handle_at with FD_NSFS_ROOT not supported");
+ }
+ ASSERT_GE(fd, 0);
+
+ /* Verify we opened the correct namespace */
+ ASSERT_EQ(fstat(ns_fd, &st1), 0);
+ ASSERT_EQ(fstat(fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+ close(fd);
+ close(ns_fd);
+ free(handle);
+}
+
+TEST(nsfs_cgroup_handle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ struct stat st1, st2;
+
+ /* Drop to unprivileged uid/gid */
+ ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */
+ ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open cgroup namespace file descriptor */
+ ns_fd = open("/proc/self/ns/cgroup", O_RDONLY);
+ if (ns_fd < 0) {
+ SKIP(free(handle); return, "cgroup namespace not available");
+ }
+
+ /* Get handle for the namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ ASSERT_GT(handle->handle_bytes, 0);
+
+ /* Try to open using FD_NSFS_ROOT */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+ SKIP(free(handle); close(ns_fd);
+ return,
+ "open_by_handle_at with FD_NSFS_ROOT not supported");
+ }
+ ASSERT_GE(fd, 0);
+
+ /* Verify we opened the correct namespace */
+ ASSERT_EQ(fstat(ns_fd, &st1), 0);
+ ASSERT_EQ(fstat(fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+ close(fd);
+ close(ns_fd);
+ free(handle);
+}
+
+TEST(nsfs_time_handle)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ struct stat st1, st2;
+
+ /* Drop to unprivileged uid/gid */
+ ASSERT_EQ(setresgid(65534, 65534, 65534), 0); /* nogroup */
+ ASSERT_EQ(setresuid(65534, 65534, 65534), 0); /* nobody */
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open time namespace file descriptor */
+ ns_fd = open("/proc/self/ns/time", O_RDONLY);
+ if (ns_fd < 0) {
+ SKIP(free(handle); return, "time namespace not available");
+ }
+
+ /* Get handle for the namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ ASSERT_GT(handle->handle_bytes, 0);
+
+ /* Try to open using FD_NSFS_ROOT */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+ if (fd < 0 && (errno == EINVAL || errno == EOPNOTSUPP)) {
+ SKIP(free(handle); close(ns_fd);
+ return,
+ "open_by_handle_at with FD_NSFS_ROOT not supported");
+ }
+ ASSERT_GE(fd, 0);
+
+ /* Verify we opened the correct namespace */
+ ASSERT_EQ(fstat(ns_fd, &st1), 0);
+ ASSERT_EQ(fstat(fd, &st2), 0);
+ ASSERT_EQ(st1.st_ino, st2.st_ino);
+ ASSERT_EQ(st1.st_dev, st2.st_dev);
+
+ close(fd);
+ close(ns_fd);
+ free(handle);
+}
+
+TEST(nsfs_user_net_namespace_isolation)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ pid_t pid;
+ int status;
+ int pipefd[2];
+ char result;
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Create pipe for communication */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ /* Get handle for current network namespace */
+ ns_fd = open("/proc/self/ns/net", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd); close(pipefd[0]);
+ close(pipefd[1]);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ close(ns_fd);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* First create new user namespace to drop privileges */
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ write(pipefd[1], "U",
+ 1); /* Unable to create user namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Write uid/gid mappings to maintain some capabilities */
+ int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
+ int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
+ int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
+
+ if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) {
+ write(pipefd[1], "M", 1); /* Unable to set mappings */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Disable setgroups to allow gid mapping */
+ write(setgroups_fd, "deny", 4);
+ close(setgroups_fd);
+
+ /* Map current uid/gid to root in the new namespace */
+ char mapping[64];
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
+ write(uid_map_fd, mapping, strlen(mapping));
+ close(uid_map_fd);
+
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
+ write(gid_map_fd, mapping, strlen(mapping));
+ close(gid_map_fd);
+
+ /* Now create new network namespace */
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ write(pipefd[1], "N",
+ 1); /* Unable to create network namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Try to open parent's network namespace handle from new user+net namespace */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+
+ if (fd >= 0) {
+ /* Should NOT succeed - we're in a different user namespace */
+ write(pipefd[1], "S", 1); /* Unexpected success */
+ close(fd);
+ } else if (errno == ESTALE) {
+ /* Expected: Stale file handle */
+ write(pipefd[1], "P", 1);
+ } else {
+ /* Other error */
+ write(pipefd[1], "F", 1);
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+ ASSERT_EQ(read(pipefd[0], &result, 1), 1);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ if (result == 'U') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new user namespace");
+ }
+ if (result == 'M') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot set uid/gid mappings");
+ }
+ if (result == 'N') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new network namespace");
+ }
+
+ /* Should fail with permission denied since we're in a different user namespace */
+ ASSERT_EQ(result, 'P');
+
+ close(pipefd[0]);
+ free(handle);
+}
+
+TEST(nsfs_user_uts_namespace_isolation)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ pid_t pid;
+ int status;
+ int pipefd[2];
+ char result;
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Create pipe for communication */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ /* Get handle for current UTS namespace */
+ ns_fd = open("/proc/self/ns/uts", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd); close(pipefd[0]);
+ close(pipefd[1]);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ close(ns_fd);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* First create new user namespace to drop privileges */
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ write(pipefd[1], "U",
+ 1); /* Unable to create user namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Write uid/gid mappings to maintain some capabilities */
+ int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
+ int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
+ int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
+
+ if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) {
+ write(pipefd[1], "M", 1); /* Unable to set mappings */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Disable setgroups to allow gid mapping */
+ write(setgroups_fd, "deny", 4);
+ close(setgroups_fd);
+
+ /* Map current uid/gid to root in the new namespace */
+ char mapping[64];
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
+ write(uid_map_fd, mapping, strlen(mapping));
+ close(uid_map_fd);
+
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
+ write(gid_map_fd, mapping, strlen(mapping));
+ close(gid_map_fd);
+
+ /* Now create new UTS namespace */
+ ret = unshare(CLONE_NEWUTS);
+ if (ret < 0) {
+ write(pipefd[1], "N",
+ 1); /* Unable to create UTS namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Try to open parent's UTS namespace handle from new user+uts namespace */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+
+ if (fd >= 0) {
+ /* Should NOT succeed - we're in a different user namespace */
+ write(pipefd[1], "S", 1); /* Unexpected success */
+ close(fd);
+ } else if (errno == ESTALE) {
+ /* Expected: Stale file handle */
+ write(pipefd[1], "P", 1);
+ } else {
+ /* Other error */
+ write(pipefd[1], "F", 1);
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+ ASSERT_EQ(read(pipefd[0], &result, 1), 1);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ if (result == 'U') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new user namespace");
+ }
+ if (result == 'M') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot set uid/gid mappings");
+ }
+ if (result == 'N') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new UTS namespace");
+ }
+
+ /* Should fail with ESTALE since we're in a different user namespace */
+ ASSERT_EQ(result, 'P');
+
+ close(pipefd[0]);
+ free(handle);
+}
+
+TEST(nsfs_user_ipc_namespace_isolation)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ pid_t pid;
+ int status;
+ int pipefd[2];
+ char result;
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Create pipe for communication */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ /* Get handle for current IPC namespace */
+ ns_fd = open("/proc/self/ns/ipc", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd); close(pipefd[0]);
+ close(pipefd[1]);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ close(ns_fd);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* First create new user namespace to drop privileges */
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ write(pipefd[1], "U",
+ 1); /* Unable to create user namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Write uid/gid mappings to maintain some capabilities */
+ int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
+ int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
+ int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
+
+ if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) {
+ write(pipefd[1], "M", 1); /* Unable to set mappings */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Disable setgroups to allow gid mapping */
+ write(setgroups_fd, "deny", 4);
+ close(setgroups_fd);
+
+ /* Map current uid/gid to root in the new namespace */
+ char mapping[64];
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
+ write(uid_map_fd, mapping, strlen(mapping));
+ close(uid_map_fd);
+
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
+ write(gid_map_fd, mapping, strlen(mapping));
+ close(gid_map_fd);
+
+ /* Now create new IPC namespace */
+ ret = unshare(CLONE_NEWIPC);
+ if (ret < 0) {
+ write(pipefd[1], "N",
+ 1); /* Unable to create IPC namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Try to open parent's IPC namespace handle from new user+ipc namespace */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+
+ if (fd >= 0) {
+ /* Should NOT succeed - we're in a different user namespace */
+ write(pipefd[1], "S", 1); /* Unexpected success */
+ close(fd);
+ } else if (errno == ESTALE) {
+ /* Expected: Stale file handle */
+ write(pipefd[1], "P", 1);
+ } else {
+ /* Other error */
+ write(pipefd[1], "F", 1);
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+ ASSERT_EQ(read(pipefd[0], &result, 1), 1);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ if (result == 'U') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new user namespace");
+ }
+ if (result == 'M') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot set uid/gid mappings");
+ }
+ if (result == 'N') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new IPC namespace");
+ }
+
+ /* Should fail with ESTALE since we're in a different user namespace */
+ ASSERT_EQ(result, 'P');
+
+ close(pipefd[0]);
+ free(handle);
+}
+
+TEST(nsfs_user_mnt_namespace_isolation)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ pid_t pid;
+ int status;
+ int pipefd[2];
+ char result;
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Create pipe for communication */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ /* Get handle for current mount namespace */
+ ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd); close(pipefd[0]);
+ close(pipefd[1]);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ close(ns_fd);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* First create new user namespace to drop privileges */
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ write(pipefd[1], "U",
+ 1); /* Unable to create user namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Write uid/gid mappings to maintain some capabilities */
+ int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
+ int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
+ int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
+
+ if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) {
+ write(pipefd[1], "M", 1); /* Unable to set mappings */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Disable setgroups to allow gid mapping */
+ write(setgroups_fd, "deny", 4);
+ close(setgroups_fd);
+
+ /* Map current uid/gid to root in the new namespace */
+ char mapping[64];
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
+ write(uid_map_fd, mapping, strlen(mapping));
+ close(uid_map_fd);
+
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
+ write(gid_map_fd, mapping, strlen(mapping));
+ close(gid_map_fd);
+
+ /* Now create new mount namespace */
+ ret = unshare(CLONE_NEWNS);
+ if (ret < 0) {
+ write(pipefd[1], "N",
+ 1); /* Unable to create mount namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Try to open parent's mount namespace handle from new user+mnt namespace */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+
+ if (fd >= 0) {
+ /* Should NOT succeed - we're in a different user namespace */
+ write(pipefd[1], "S", 1); /* Unexpected success */
+ close(fd);
+ } else if (errno == ESTALE) {
+ /* Expected: Stale file handle */
+ write(pipefd[1], "P", 1);
+ } else {
+ /* Other error */
+ write(pipefd[1], "F", 1);
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+ ASSERT_EQ(read(pipefd[0], &result, 1), 1);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ if (result == 'U') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new user namespace");
+ }
+ if (result == 'M') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot set uid/gid mappings");
+ }
+ if (result == 'N') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new mount namespace");
+ }
+
+ /* Should fail with ESTALE since we're in a different user namespace */
+ ASSERT_EQ(result, 'P');
+
+ close(pipefd[0]);
+ free(handle);
+}
+
+TEST(nsfs_user_cgroup_namespace_isolation)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ pid_t pid;
+ int status;
+ int pipefd[2];
+ char result;
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Create pipe for communication */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ /* Get handle for current cgroup namespace */
+ ns_fd = open("/proc/self/ns/cgroup", O_RDONLY);
+ if (ns_fd < 0) {
+ SKIP(free(handle); close(pipefd[0]); close(pipefd[1]);
+ return, "cgroup namespace not available");
+ }
+
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd); close(pipefd[0]);
+ close(pipefd[1]);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ close(ns_fd);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* First create new user namespace to drop privileges */
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ write(pipefd[1], "U",
+ 1); /* Unable to create user namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Write uid/gid mappings to maintain some capabilities */
+ int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
+ int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
+ int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
+
+ if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) {
+ write(pipefd[1], "M", 1); /* Unable to set mappings */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Disable setgroups to allow gid mapping */
+ write(setgroups_fd, "deny", 4);
+ close(setgroups_fd);
+
+ /* Map current uid/gid to root in the new namespace */
+ char mapping[64];
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
+ write(uid_map_fd, mapping, strlen(mapping));
+ close(uid_map_fd);
+
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
+ write(gid_map_fd, mapping, strlen(mapping));
+ close(gid_map_fd);
+
+ /* Now create new cgroup namespace */
+ ret = unshare(CLONE_NEWCGROUP);
+ if (ret < 0) {
+ write(pipefd[1], "N",
+ 1); /* Unable to create cgroup namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Try to open parent's cgroup namespace handle from new user+cgroup namespace */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+
+ if (fd >= 0) {
+ /* Should NOT succeed - we're in a different user namespace */
+ write(pipefd[1], "S", 1); /* Unexpected success */
+ close(fd);
+ } else if (errno == ESTALE) {
+ /* Expected: Stale file handle */
+ write(pipefd[1], "P", 1);
+ } else {
+ /* Other error */
+ write(pipefd[1], "F", 1);
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+ ASSERT_EQ(read(pipefd[0], &result, 1), 1);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ if (result == 'U') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new user namespace");
+ }
+ if (result == 'M') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot set uid/gid mappings");
+ }
+ if (result == 'N') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new cgroup namespace");
+ }
+
+ /* Should fail with ESTALE since we're in a different user namespace */
+ ASSERT_EQ(result, 'P');
+
+ close(pipefd[0]);
+ free(handle);
+}
+
+TEST(nsfs_user_pid_namespace_isolation)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ pid_t pid;
+ int status;
+ int pipefd[2];
+ char result;
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Create pipe for communication */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ /* Get handle for current PID namespace */
+ ns_fd = open("/proc/self/ns/pid", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd); close(pipefd[0]);
+ close(pipefd[1]);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ close(ns_fd);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* First create new user namespace to drop privileges */
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ write(pipefd[1], "U",
+ 1); /* Unable to create user namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Write uid/gid mappings to maintain some capabilities */
+ int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
+ int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
+ int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
+
+ if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) {
+ write(pipefd[1], "M", 1); /* Unable to set mappings */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Disable setgroups to allow gid mapping */
+ write(setgroups_fd, "deny", 4);
+ close(setgroups_fd);
+
+ /* Map current uid/gid to root in the new namespace */
+ char mapping[64];
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
+ write(uid_map_fd, mapping, strlen(mapping));
+ close(uid_map_fd);
+
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
+ write(gid_map_fd, mapping, strlen(mapping));
+ close(gid_map_fd);
+
+ /* Now create new PID namespace - requires fork to take effect */
+ ret = unshare(CLONE_NEWPID);
+ if (ret < 0) {
+ write(pipefd[1], "N",
+ 1); /* Unable to create PID namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Fork again for PID namespace to take effect */
+ pid_t child_pid = fork();
+ if (child_pid < 0) {
+ write(pipefd[1], "N",
+ 1); /* Unable to fork in PID namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ if (child_pid == 0) {
+ /* Grandchild in new PID namespace */
+ /* Try to open parent's PID namespace handle from new user+pid namespace */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+
+ if (fd >= 0) {
+ /* Should NOT succeed - we're in a different user namespace */
+ write(pipefd[1], "S",
+ 1); /* Unexpected success */
+ close(fd);
+ } else if (errno == ESTALE) {
+ /* Expected: Stale file handle */
+ write(pipefd[1], "P", 1);
+ } else {
+ /* Other error */
+ write(pipefd[1], "F", 1);
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Wait for grandchild */
+ waitpid(child_pid, NULL, 0);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+ ASSERT_EQ(read(pipefd[0], &result, 1), 1);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ if (result == 'U') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new user namespace");
+ }
+ if (result == 'M') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot set uid/gid mappings");
+ }
+ if (result == 'N') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new PID namespace");
+ }
+
+ /* Should fail with ESTALE since we're in a different user namespace */
+ ASSERT_EQ(result, 'P');
+
+ close(pipefd[0]);
+ free(handle);
+}
+
+TEST(nsfs_user_time_namespace_isolation)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+ pid_t pid;
+ int status;
+ int pipefd[2];
+ char result;
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Create pipe for communication */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ /* Get handle for current time namespace */
+ ns_fd = open("/proc/self/ns/time", O_RDONLY);
+ if (ns_fd < 0) {
+ SKIP(free(handle); close(pipefd[0]); close(pipefd[1]);
+ return, "time namespace not available");
+ }
+
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd); close(pipefd[0]);
+ close(pipefd[1]);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ close(ns_fd);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* First create new user namespace to drop privileges */
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ write(pipefd[1], "U",
+ 1); /* Unable to create user namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Write uid/gid mappings to maintain some capabilities */
+ int uid_map_fd = open("/proc/self/uid_map", O_WRONLY);
+ int gid_map_fd = open("/proc/self/gid_map", O_WRONLY);
+ int setgroups_fd = open("/proc/self/setgroups", O_WRONLY);
+
+ if (uid_map_fd < 0 || gid_map_fd < 0 || setgroups_fd < 0) {
+ write(pipefd[1], "M", 1); /* Unable to set mappings */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Disable setgroups to allow gid mapping */
+ write(setgroups_fd, "deny", 4);
+ close(setgroups_fd);
+
+ /* Map current uid/gid to root in the new namespace */
+ char mapping[64];
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getuid());
+ write(uid_map_fd, mapping, strlen(mapping));
+ close(uid_map_fd);
+
+ snprintf(mapping, sizeof(mapping), "0 %d 1", getgid());
+ write(gid_map_fd, mapping, strlen(mapping));
+ close(gid_map_fd);
+
+ /* Now create new time namespace - requires fork to take effect */
+ ret = unshare(CLONE_NEWTIME);
+ if (ret < 0) {
+ write(pipefd[1], "N",
+ 1); /* Unable to create time namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Fork again for time namespace to take effect */
+ pid_t child_pid = fork();
+ if (child_pid < 0) {
+ write(pipefd[1], "N",
+ 1); /* Unable to fork in time namespace */
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ if (child_pid == 0) {
+ /* Grandchild in new time namespace */
+ /* Try to open parent's time namespace handle from new user+time namespace */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDONLY);
+
+ if (fd >= 0) {
+ /* Should NOT succeed - we're in a different user namespace */
+ write(pipefd[1], "S",
+ 1); /* Unexpected success */
+ close(fd);
+ } else if (errno == ESTALE) {
+ /* Expected: Stale file handle */
+ write(pipefd[1], "P", 1);
+ } else {
+ /* Other error */
+ write(pipefd[1], "F", 1);
+ }
+
+ close(pipefd[1]);
+ exit(0);
+ }
+
+ /* Wait for grandchild */
+ waitpid(child_pid, NULL, 0);
+ exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+ ASSERT_EQ(read(pipefd[0], &result, 1), 1);
+
+ waitpid(pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(WEXITSTATUS(status), 0);
+
+ if (result == 'U') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new user namespace");
+ }
+ if (result == 'M') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot set uid/gid mappings");
+ }
+ if (result == 'N') {
+ SKIP(free(handle); close(pipefd[0]);
+ return, "Cannot create new time namespace");
+ }
+
+ /* Should fail with ESTALE since we're in a different user namespace */
+ ASSERT_EQ(result, 'P');
+
+ close(pipefd[0]);
+ free(handle);
+}
+
+TEST(nsfs_open_flags)
+{
+ struct file_handle *handle;
+ int mount_id;
+ int ret;
+ int fd;
+ int ns_fd;
+
+ handle = malloc(sizeof(*handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(handle, NULL);
+
+ /* Open a namespace file descriptor */
+ ns_fd = open("/proc/self/ns/net", O_RDONLY);
+ ASSERT_GE(ns_fd, 0);
+
+ /* Get handle for the namespace */
+ handle->handle_bytes = MAX_HANDLE_SZ;
+ ret = name_to_handle_at(ns_fd, "", handle, &mount_id, AT_EMPTY_PATH);
+ if (ret < 0 && errno == EOPNOTSUPP) {
+ SKIP(free(handle); close(ns_fd);
+ return, "nsfs doesn't support file handles");
+ }
+ ASSERT_EQ(ret, 0);
+ ASSERT_GT(handle->handle_bytes, 0);
+
+ /* Test invalid flags that should fail */
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_WRONLY);
+ ASSERT_LT(fd, 0);
+ ASSERT_EQ(errno, EPERM);
+
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_RDWR);
+ ASSERT_LT(fd, 0);
+ ASSERT_EQ(errno, EPERM);
+
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_TRUNC);
+ ASSERT_LT(fd, 0);
+ ASSERT_EQ(errno, EPERM);
+
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_DIRECT);
+ ASSERT_LT(fd, 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_TMPFILE);
+ ASSERT_LT(fd, 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ fd = open_by_handle_at(FD_NSFS_ROOT, handle, O_DIRECTORY);
+ ASSERT_LT(fd, 0);
+ ASSERT_EQ(errno, ENOTDIR);
+
+ close(ns_fd);
+ free(handle);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/init_ino_test.c b/tools/testing/selftests/namespaces/init_ino_test.c
new file mode 100644
index 000000000000..5b6993c3740b
--- /dev/null
+++ b/tools/testing/selftests/namespaces/init_ino_test.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2025 Christian Brauner <brauner@kernel.org>
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <linux/nsfs.h>
+
+#include "../kselftest_harness.h"
+
+struct ns_info {
+ const char *name;
+ const char *proc_path;
+ unsigned int expected_ino;
+};
+
+static struct ns_info namespaces[] = {
+ { "ipc", "/proc/1/ns/ipc", IPC_NS_INIT_INO },
+ { "uts", "/proc/1/ns/uts", UTS_NS_INIT_INO },
+ { "user", "/proc/1/ns/user", USER_NS_INIT_INO },
+ { "pid", "/proc/1/ns/pid", PID_NS_INIT_INO },
+ { "cgroup", "/proc/1/ns/cgroup", CGROUP_NS_INIT_INO },
+ { "time", "/proc/1/ns/time", TIME_NS_INIT_INO },
+ { "net", "/proc/1/ns/net", NET_NS_INIT_INO },
+ { "mnt", "/proc/1/ns/mnt", MNT_NS_INIT_INO },
+};
+
+TEST(init_namespace_inodes)
+{
+ struct stat st;
+
+ for (int i = 0; i < sizeof(namespaces) / sizeof(namespaces[0]); i++) {
+ int ret = stat(namespaces[i].proc_path, &st);
+
+ /* Some namespaces might not be available (e.g., time namespace on older kernels) */
+ if (ret < 0) {
+ if (errno == ENOENT) {
+ ksft_test_result_skip("%s namespace not available\n",
+ namespaces[i].name);
+ continue;
+ }
+ ASSERT_GE(ret, 0)
+ TH_LOG("Failed to stat %s: %s",
+ namespaces[i].proc_path, strerror(errno));
+ }
+
+ ASSERT_EQ(st.st_ino, namespaces[i].expected_ino)
+ TH_LOG("Namespace %s has inode 0x%lx, expected 0x%x",
+ namespaces[i].name, st.st_ino, namespaces[i].expected_ino);
+
+ ksft_print_msg("Namespace %s: inode 0x%lx matches expected 0x%x\n",
+ namespaces[i].name, st.st_ino, namespaces[i].expected_ino);
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/nsid_test.c b/tools/testing/selftests/namespaces/nsid_test.c
new file mode 100644
index 000000000000..e28accd74a57
--- /dev/null
+++ b/tools/testing/selftests/namespaces/nsid_test.c
@@ -0,0 +1,986 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <libgen.h>
+#include <limits.h>
+#include <pthread.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <poll.h>
+#include <sys/epoll.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+#include <linux/fs.h>
+#include <linux/limits.h>
+#include <linux/nsfs.h>
+#include "../kselftest_harness.h"
+
+TEST(nsid_mntns_basic)
+{
+ __u64 mnt_ns_id = 0;
+ int fd_mntns;
+ int ret;
+
+ /* Open the current mount namespace */
+ fd_mntns = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(fd_mntns, 0);
+
+ /* Get the mount namespace ID */
+ ret = ioctl(fd_mntns, NS_GET_MNTNS_ID, &mnt_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(mnt_ns_id, 0);
+
+ /* Verify we can get the same ID again */
+ __u64 mnt_ns_id2 = 0;
+ ret = ioctl(fd_mntns, NS_GET_ID, &mnt_ns_id2);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(mnt_ns_id, mnt_ns_id2);
+
+ close(fd_mntns);
+}
+
+TEST(nsid_mntns_separate)
+{
+ __u64 parent_mnt_ns_id = 0;
+ __u64 child_mnt_ns_id = 0;
+ int fd_parent_mntns, fd_child_mntns;
+ int ret;
+ pid_t pid;
+ int pipefd[2];
+
+ /* Get parent's mount namespace ID */
+ fd_parent_mntns = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(fd_parent_mntns, 0);
+ ret = ioctl(fd_parent_mntns, NS_GET_ID, &parent_mnt_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(parent_mnt_ns_id, 0);
+
+ /* Create a pipe for synchronization */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new mount namespace */
+ ret = unshare(CLONE_NEWNS);
+ if (ret != 0) {
+ /* Skip test if we don't have permission */
+ if (errno == EPERM || errno == EACCES) {
+ write(pipefd[1], "S", 1); /* Signal skip */
+ _exit(0);
+ }
+ _exit(1);
+ }
+
+ /* Signal success */
+ write(pipefd[1], "Y", 1);
+ close(pipefd[1]);
+
+ /* Keep namespace alive */
+ pause();
+ _exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ char buf;
+ ASSERT_EQ(read(pipefd[0], &buf, 1), 1);
+ close(pipefd[0]);
+
+ if (buf == 'S') {
+ /* Child couldn't create namespace, skip test */
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+ close(fd_parent_mntns);
+ SKIP(return, "No permission to create mount namespace");
+ }
+
+ ASSERT_EQ(buf, 'Y');
+
+ /* Open child's mount namespace */
+ char path[256];
+ snprintf(path, sizeof(path), "/proc/%d/ns/mnt", pid);
+ fd_child_mntns = open(path, O_RDONLY);
+ ASSERT_GE(fd_child_mntns, 0);
+
+ /* Get child's mount namespace ID */
+ ret = ioctl(fd_child_mntns, NS_GET_ID, &child_mnt_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(child_mnt_ns_id, 0);
+
+ /* Parent and child should have different mount namespace IDs */
+ ASSERT_NE(parent_mnt_ns_id, child_mnt_ns_id);
+
+ close(fd_parent_mntns);
+ close(fd_child_mntns);
+
+ /* Clean up child process */
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+}
+
+TEST(nsid_cgroupns_basic)
+{
+ __u64 cgroup_ns_id = 0;
+ int fd_cgroupns;
+ int ret;
+
+ /* Open the current cgroup namespace */
+ fd_cgroupns = open("/proc/self/ns/cgroup", O_RDONLY);
+ ASSERT_GE(fd_cgroupns, 0);
+
+ /* Get the cgroup namespace ID */
+ ret = ioctl(fd_cgroupns, NS_GET_ID, &cgroup_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(cgroup_ns_id, 0);
+
+ /* Verify we can get the same ID again */
+ __u64 cgroup_ns_id2 = 0;
+ ret = ioctl(fd_cgroupns, NS_GET_ID, &cgroup_ns_id2);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(cgroup_ns_id, cgroup_ns_id2);
+
+ close(fd_cgroupns);
+}
+
+TEST(nsid_cgroupns_separate)
+{
+ __u64 parent_cgroup_ns_id = 0;
+ __u64 child_cgroup_ns_id = 0;
+ int fd_parent_cgroupns, fd_child_cgroupns;
+ int ret;
+ pid_t pid;
+ int pipefd[2];
+
+ /* Get parent's cgroup namespace ID */
+ fd_parent_cgroupns = open("/proc/self/ns/cgroup", O_RDONLY);
+ ASSERT_GE(fd_parent_cgroupns, 0);
+ ret = ioctl(fd_parent_cgroupns, NS_GET_ID, &parent_cgroup_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(parent_cgroup_ns_id, 0);
+
+ /* Create a pipe for synchronization */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new cgroup namespace */
+ ret = unshare(CLONE_NEWCGROUP);
+ if (ret != 0) {
+ /* Skip test if we don't have permission */
+ if (errno == EPERM || errno == EACCES) {
+ write(pipefd[1], "S", 1); /* Signal skip */
+ _exit(0);
+ }
+ _exit(1);
+ }
+
+ /* Signal success */
+ write(pipefd[1], "Y", 1);
+ close(pipefd[1]);
+
+ /* Keep namespace alive */
+ pause();
+ _exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ char buf;
+ ASSERT_EQ(read(pipefd[0], &buf, 1), 1);
+ close(pipefd[0]);
+
+ if (buf == 'S') {
+ /* Child couldn't create namespace, skip test */
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+ close(fd_parent_cgroupns);
+ SKIP(return, "No permission to create cgroup namespace");
+ }
+
+ ASSERT_EQ(buf, 'Y');
+
+ /* Open child's cgroup namespace */
+ char path[256];
+ snprintf(path, sizeof(path), "/proc/%d/ns/cgroup", pid);
+ fd_child_cgroupns = open(path, O_RDONLY);
+ ASSERT_GE(fd_child_cgroupns, 0);
+
+ /* Get child's cgroup namespace ID */
+ ret = ioctl(fd_child_cgroupns, NS_GET_ID, &child_cgroup_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(child_cgroup_ns_id, 0);
+
+ /* Parent and child should have different cgroup namespace IDs */
+ ASSERT_NE(parent_cgroup_ns_id, child_cgroup_ns_id);
+
+ close(fd_parent_cgroupns);
+ close(fd_child_cgroupns);
+
+ /* Clean up child process */
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+}
+
+TEST(nsid_ipcns_basic)
+{
+ __u64 ipc_ns_id = 0;
+ int fd_ipcns;
+ int ret;
+
+ /* Open the current IPC namespace */
+ fd_ipcns = open("/proc/self/ns/ipc", O_RDONLY);
+ ASSERT_GE(fd_ipcns, 0);
+
+ /* Get the IPC namespace ID */
+ ret = ioctl(fd_ipcns, NS_GET_ID, &ipc_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(ipc_ns_id, 0);
+
+ /* Verify we can get the same ID again */
+ __u64 ipc_ns_id2 = 0;
+ ret = ioctl(fd_ipcns, NS_GET_ID, &ipc_ns_id2);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(ipc_ns_id, ipc_ns_id2);
+
+ close(fd_ipcns);
+}
+
+TEST(nsid_ipcns_separate)
+{
+ __u64 parent_ipc_ns_id = 0;
+ __u64 child_ipc_ns_id = 0;
+ int fd_parent_ipcns, fd_child_ipcns;
+ int ret;
+ pid_t pid;
+ int pipefd[2];
+
+ /* Get parent's IPC namespace ID */
+ fd_parent_ipcns = open("/proc/self/ns/ipc", O_RDONLY);
+ ASSERT_GE(fd_parent_ipcns, 0);
+ ret = ioctl(fd_parent_ipcns, NS_GET_ID, &parent_ipc_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(parent_ipc_ns_id, 0);
+
+ /* Create a pipe for synchronization */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new IPC namespace */
+ ret = unshare(CLONE_NEWIPC);
+ if (ret != 0) {
+ /* Skip test if we don't have permission */
+ if (errno == EPERM || errno == EACCES) {
+ write(pipefd[1], "S", 1); /* Signal skip */
+ _exit(0);
+ }
+ _exit(1);
+ }
+
+ /* Signal success */
+ write(pipefd[1], "Y", 1);
+ close(pipefd[1]);
+
+ /* Keep namespace alive */
+ pause();
+ _exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ char buf;
+ ASSERT_EQ(read(pipefd[0], &buf, 1), 1);
+ close(pipefd[0]);
+
+ if (buf == 'S') {
+ /* Child couldn't create namespace, skip test */
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+ close(fd_parent_ipcns);
+ SKIP(return, "No permission to create IPC namespace");
+ }
+
+ ASSERT_EQ(buf, 'Y');
+
+ /* Open child's IPC namespace */
+ char path[256];
+ snprintf(path, sizeof(path), "/proc/%d/ns/ipc", pid);
+ fd_child_ipcns = open(path, O_RDONLY);
+ ASSERT_GE(fd_child_ipcns, 0);
+
+ /* Get child's IPC namespace ID */
+ ret = ioctl(fd_child_ipcns, NS_GET_ID, &child_ipc_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(child_ipc_ns_id, 0);
+
+ /* Parent and child should have different IPC namespace IDs */
+ ASSERT_NE(parent_ipc_ns_id, child_ipc_ns_id);
+
+ close(fd_parent_ipcns);
+ close(fd_child_ipcns);
+
+ /* Clean up child process */
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+}
+
+TEST(nsid_utsns_basic)
+{
+ __u64 uts_ns_id = 0;
+ int fd_utsns;
+ int ret;
+
+ /* Open the current UTS namespace */
+ fd_utsns = open("/proc/self/ns/uts", O_RDONLY);
+ ASSERT_GE(fd_utsns, 0);
+
+ /* Get the UTS namespace ID */
+ ret = ioctl(fd_utsns, NS_GET_ID, &uts_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(uts_ns_id, 0);
+
+ /* Verify we can get the same ID again */
+ __u64 uts_ns_id2 = 0;
+ ret = ioctl(fd_utsns, NS_GET_ID, &uts_ns_id2);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(uts_ns_id, uts_ns_id2);
+
+ close(fd_utsns);
+}
+
+TEST(nsid_utsns_separate)
+{
+ __u64 parent_uts_ns_id = 0;
+ __u64 child_uts_ns_id = 0;
+ int fd_parent_utsns, fd_child_utsns;
+ int ret;
+ pid_t pid;
+ int pipefd[2];
+
+ /* Get parent's UTS namespace ID */
+ fd_parent_utsns = open("/proc/self/ns/uts", O_RDONLY);
+ ASSERT_GE(fd_parent_utsns, 0);
+ ret = ioctl(fd_parent_utsns, NS_GET_ID, &parent_uts_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(parent_uts_ns_id, 0);
+
+ /* Create a pipe for synchronization */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new UTS namespace */
+ ret = unshare(CLONE_NEWUTS);
+ if (ret != 0) {
+ /* Skip test if we don't have permission */
+ if (errno == EPERM || errno == EACCES) {
+ write(pipefd[1], "S", 1); /* Signal skip */
+ _exit(0);
+ }
+ _exit(1);
+ }
+
+ /* Signal success */
+ write(pipefd[1], "Y", 1);
+ close(pipefd[1]);
+
+ /* Keep namespace alive */
+ pause();
+ _exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ char buf;
+ ASSERT_EQ(read(pipefd[0], &buf, 1), 1);
+ close(pipefd[0]);
+
+ if (buf == 'S') {
+ /* Child couldn't create namespace, skip test */
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+ close(fd_parent_utsns);
+ SKIP(return, "No permission to create UTS namespace");
+ }
+
+ ASSERT_EQ(buf, 'Y');
+
+ /* Open child's UTS namespace */
+ char path[256];
+ snprintf(path, sizeof(path), "/proc/%d/ns/uts", pid);
+ fd_child_utsns = open(path, O_RDONLY);
+ ASSERT_GE(fd_child_utsns, 0);
+
+ /* Get child's UTS namespace ID */
+ ret = ioctl(fd_child_utsns, NS_GET_ID, &child_uts_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(child_uts_ns_id, 0);
+
+ /* Parent and child should have different UTS namespace IDs */
+ ASSERT_NE(parent_uts_ns_id, child_uts_ns_id);
+
+ close(fd_parent_utsns);
+ close(fd_child_utsns);
+
+ /* Clean up child process */
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+}
+
+TEST(nsid_userns_basic)
+{
+ __u64 user_ns_id = 0;
+ int fd_userns;
+ int ret;
+
+ /* Open the current user namespace */
+ fd_userns = open("/proc/self/ns/user", O_RDONLY);
+ ASSERT_GE(fd_userns, 0);
+
+ /* Get the user namespace ID */
+ ret = ioctl(fd_userns, NS_GET_ID, &user_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(user_ns_id, 0);
+
+ /* Verify we can get the same ID again */
+ __u64 user_ns_id2 = 0;
+ ret = ioctl(fd_userns, NS_GET_ID, &user_ns_id2);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(user_ns_id, user_ns_id2);
+
+ close(fd_userns);
+}
+
+TEST(nsid_userns_separate)
+{
+ __u64 parent_user_ns_id = 0;
+ __u64 child_user_ns_id = 0;
+ int fd_parent_userns, fd_child_userns;
+ int ret;
+ pid_t pid;
+ int pipefd[2];
+
+ /* Get parent's user namespace ID */
+ fd_parent_userns = open("/proc/self/ns/user", O_RDONLY);
+ ASSERT_GE(fd_parent_userns, 0);
+ ret = ioctl(fd_parent_userns, NS_GET_ID, &parent_user_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(parent_user_ns_id, 0);
+
+ /* Create a pipe for synchronization */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new user namespace */
+ ret = unshare(CLONE_NEWUSER);
+ if (ret != 0) {
+ /* Skip test if we don't have permission */
+ if (errno == EPERM || errno == EACCES) {
+ write(pipefd[1], "S", 1); /* Signal skip */
+ _exit(0);
+ }
+ _exit(1);
+ }
+
+ /* Signal success */
+ write(pipefd[1], "Y", 1);
+ close(pipefd[1]);
+
+ /* Keep namespace alive */
+ pause();
+ _exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ char buf;
+ ASSERT_EQ(read(pipefd[0], &buf, 1), 1);
+ close(pipefd[0]);
+
+ if (buf == 'S') {
+ /* Child couldn't create namespace, skip test */
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+ close(fd_parent_userns);
+ SKIP(return, "No permission to create user namespace");
+ }
+
+ ASSERT_EQ(buf, 'Y');
+
+ /* Open child's user namespace */
+ char path[256];
+ snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
+ fd_child_userns = open(path, O_RDONLY);
+ ASSERT_GE(fd_child_userns, 0);
+
+ /* Get child's user namespace ID */
+ ret = ioctl(fd_child_userns, NS_GET_ID, &child_user_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(child_user_ns_id, 0);
+
+ /* Parent and child should have different user namespace IDs */
+ ASSERT_NE(parent_user_ns_id, child_user_ns_id);
+
+ close(fd_parent_userns);
+ close(fd_child_userns);
+
+ /* Clean up child process */
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+}
+
+TEST(nsid_timens_basic)
+{
+ __u64 time_ns_id = 0;
+ int fd_timens;
+ int ret;
+
+ /* Open the current time namespace */
+ fd_timens = open("/proc/self/ns/time", O_RDONLY);
+ if (fd_timens < 0) {
+ SKIP(return, "Time namespaces not supported");
+ }
+
+ /* Get the time namespace ID */
+ ret = ioctl(fd_timens, NS_GET_ID, &time_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(time_ns_id, 0);
+
+ /* Verify we can get the same ID again */
+ __u64 time_ns_id2 = 0;
+ ret = ioctl(fd_timens, NS_GET_ID, &time_ns_id2);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(time_ns_id, time_ns_id2);
+
+ close(fd_timens);
+}
+
+TEST(nsid_timens_separate)
+{
+ __u64 parent_time_ns_id = 0;
+ __u64 child_time_ns_id = 0;
+ int fd_parent_timens, fd_child_timens;
+ int ret;
+ pid_t pid;
+ int pipefd[2];
+
+ /* Open the current time namespace */
+ fd_parent_timens = open("/proc/self/ns/time", O_RDONLY);
+ if (fd_parent_timens < 0) {
+ SKIP(return, "Time namespaces not supported");
+ }
+
+ /* Get parent's time namespace ID */
+ ret = ioctl(fd_parent_timens, NS_GET_ID, &parent_time_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(parent_time_ns_id, 0);
+
+ /* Create a pipe for synchronization */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new time namespace */
+ ret = unshare(CLONE_NEWTIME);
+ if (ret != 0) {
+ /* Skip test if we don't have permission */
+ if (errno == EPERM || errno == EACCES || errno == EINVAL) {
+ write(pipefd[1], "S", 1); /* Signal skip */
+ _exit(0);
+ }
+ _exit(1);
+ }
+
+ /* Fork a grandchild to actually enter the new namespace */
+ pid_t grandchild = fork();
+ if (grandchild == 0) {
+ /* Grandchild is in the new namespace */
+ write(pipefd[1], "Y", 1);
+ close(pipefd[1]);
+ pause();
+ _exit(0);
+ } else if (grandchild > 0) {
+ /* Child writes grandchild PID and waits */
+ write(pipefd[1], "Y", 1);
+ write(pipefd[1], &grandchild, sizeof(grandchild));
+ close(pipefd[1]);
+ pause(); /* Keep the parent alive to maintain the grandchild */
+ _exit(0);
+ } else {
+ _exit(1);
+ }
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ char buf;
+ ASSERT_EQ(read(pipefd[0], &buf, 1), 1);
+
+ if (buf == 'S') {
+ /* Child couldn't create namespace, skip test */
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+ close(fd_parent_timens);
+ close(pipefd[0]);
+ SKIP(return, "Cannot create time namespace");
+ }
+
+ ASSERT_EQ(buf, 'Y');
+
+ pid_t grandchild_pid;
+ ASSERT_EQ(read(pipefd[0], &grandchild_pid, sizeof(grandchild_pid)), sizeof(grandchild_pid));
+ close(pipefd[0]);
+
+ /* Open grandchild's time namespace */
+ char path[256];
+ snprintf(path, sizeof(path), "/proc/%d/ns/time", grandchild_pid);
+ fd_child_timens = open(path, O_RDONLY);
+ ASSERT_GE(fd_child_timens, 0);
+
+ /* Get child's time namespace ID */
+ ret = ioctl(fd_child_timens, NS_GET_ID, &child_time_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(child_time_ns_id, 0);
+
+ /* Parent and child should have different time namespace IDs */
+ ASSERT_NE(parent_time_ns_id, child_time_ns_id);
+
+ close(fd_parent_timens);
+ close(fd_child_timens);
+
+ /* Clean up child process */
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+}
+
+TEST(nsid_pidns_basic)
+{
+ __u64 pid_ns_id = 0;
+ int fd_pidns;
+ int ret;
+
+ /* Open the current PID namespace */
+ fd_pidns = open("/proc/self/ns/pid", O_RDONLY);
+ ASSERT_GE(fd_pidns, 0);
+
+ /* Get the PID namespace ID */
+ ret = ioctl(fd_pidns, NS_GET_ID, &pid_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(pid_ns_id, 0);
+
+ /* Verify we can get the same ID again */
+ __u64 pid_ns_id2 = 0;
+ ret = ioctl(fd_pidns, NS_GET_ID, &pid_ns_id2);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(pid_ns_id, pid_ns_id2);
+
+ close(fd_pidns);
+}
+
+TEST(nsid_pidns_separate)
+{
+ __u64 parent_pid_ns_id = 0;
+ __u64 child_pid_ns_id = 0;
+ int fd_parent_pidns, fd_child_pidns;
+ int ret;
+ pid_t pid;
+ int pipefd[2];
+
+ /* Get parent's PID namespace ID */
+ fd_parent_pidns = open("/proc/self/ns/pid", O_RDONLY);
+ ASSERT_GE(fd_parent_pidns, 0);
+ ret = ioctl(fd_parent_pidns, NS_GET_ID, &parent_pid_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(parent_pid_ns_id, 0);
+
+ /* Create a pipe for synchronization */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new PID namespace */
+ ret = unshare(CLONE_NEWPID);
+ if (ret != 0) {
+ /* Skip test if we don't have permission */
+ if (errno == EPERM || errno == EACCES) {
+ write(pipefd[1], "S", 1); /* Signal skip */
+ _exit(0);
+ }
+ _exit(1);
+ }
+
+ /* Fork a grandchild to actually enter the new namespace */
+ pid_t grandchild = fork();
+ if (grandchild == 0) {
+ /* Grandchild is in the new namespace */
+ write(pipefd[1], "Y", 1);
+ close(pipefd[1]);
+ pause();
+ _exit(0);
+ } else if (grandchild > 0) {
+ /* Child writes grandchild PID and waits */
+ write(pipefd[1], "Y", 1);
+ write(pipefd[1], &grandchild, sizeof(grandchild));
+ close(pipefd[1]);
+ pause(); /* Keep the parent alive to maintain the grandchild */
+ _exit(0);
+ } else {
+ _exit(1);
+ }
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ char buf;
+ ASSERT_EQ(read(pipefd[0], &buf, 1), 1);
+
+ if (buf == 'S') {
+ /* Child couldn't create namespace, skip test */
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+ close(fd_parent_pidns);
+ close(pipefd[0]);
+ SKIP(return, "No permission to create PID namespace");
+ }
+
+ ASSERT_EQ(buf, 'Y');
+
+ pid_t grandchild_pid;
+ ASSERT_EQ(read(pipefd[0], &grandchild_pid, sizeof(grandchild_pid)), sizeof(grandchild_pid));
+ close(pipefd[0]);
+
+ /* Open grandchild's PID namespace */
+ char path[256];
+ snprintf(path, sizeof(path), "/proc/%d/ns/pid", grandchild_pid);
+ fd_child_pidns = open(path, O_RDONLY);
+ ASSERT_GE(fd_child_pidns, 0);
+
+ /* Get child's PID namespace ID */
+ ret = ioctl(fd_child_pidns, NS_GET_ID, &child_pid_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(child_pid_ns_id, 0);
+
+ /* Parent and child should have different PID namespace IDs */
+ ASSERT_NE(parent_pid_ns_id, child_pid_ns_id);
+
+ close(fd_parent_pidns);
+ close(fd_child_pidns);
+
+ /* Clean up child process */
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+}
+
+TEST(nsid_netns_basic)
+{
+ __u64 net_ns_id = 0;
+ __u64 netns_cookie = 0;
+ int fd_netns;
+ int sock;
+ socklen_t optlen;
+ int ret;
+
+ /* Open the current network namespace */
+ fd_netns = open("/proc/self/ns/net", O_RDONLY);
+ ASSERT_GE(fd_netns, 0);
+
+ /* Get the network namespace ID via ioctl */
+ ret = ioctl(fd_netns, NS_GET_ID, &net_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(net_ns_id, 0);
+
+ /* Create a socket to get the SO_NETNS_COOKIE */
+ sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_GE(sock, 0);
+
+ /* Get the network namespace cookie via socket option */
+ optlen = sizeof(netns_cookie);
+ ret = getsockopt(sock, SOL_SOCKET, SO_NETNS_COOKIE, &netns_cookie, &optlen);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(optlen, sizeof(netns_cookie));
+
+ /* The namespace ID and cookie should be identical */
+ ASSERT_EQ(net_ns_id, netns_cookie);
+
+ /* Verify we can get the same ID again */
+ __u64 net_ns_id2 = 0;
+ ret = ioctl(fd_netns, NS_GET_ID, &net_ns_id2);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(net_ns_id, net_ns_id2);
+
+ close(sock);
+ close(fd_netns);
+}
+
+TEST(nsid_netns_separate)
+{
+ __u64 parent_net_ns_id = 0;
+ __u64 parent_netns_cookie = 0;
+ __u64 child_net_ns_id = 0;
+ __u64 child_netns_cookie = 0;
+ int fd_parent_netns, fd_child_netns;
+ int parent_sock, child_sock;
+ socklen_t optlen;
+ int ret;
+ pid_t pid;
+ int pipefd[2];
+
+ /* Get parent's network namespace ID */
+ fd_parent_netns = open("/proc/self/ns/net", O_RDONLY);
+ ASSERT_GE(fd_parent_netns, 0);
+ ret = ioctl(fd_parent_netns, NS_GET_ID, &parent_net_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(parent_net_ns_id, 0);
+
+ /* Get parent's network namespace cookie */
+ parent_sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_GE(parent_sock, 0);
+ optlen = sizeof(parent_netns_cookie);
+ ret = getsockopt(parent_sock, SOL_SOCKET, SO_NETNS_COOKIE, &parent_netns_cookie, &optlen);
+ ASSERT_EQ(ret, 0);
+
+ /* Verify parent's ID and cookie match */
+ ASSERT_EQ(parent_net_ns_id, parent_netns_cookie);
+
+ /* Create a pipe for synchronization */
+ ASSERT_EQ(pipe(pipefd), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* Child process */
+ close(pipefd[0]);
+
+ /* Create new network namespace */
+ ret = unshare(CLONE_NEWNET);
+ if (ret != 0) {
+ /* Skip test if we don't have permission */
+ if (errno == EPERM || errno == EACCES) {
+ write(pipefd[1], "S", 1); /* Signal skip */
+ _exit(0);
+ }
+ _exit(1);
+ }
+
+ /* Signal success */
+ write(pipefd[1], "Y", 1);
+ close(pipefd[1]);
+
+ /* Keep namespace alive */
+ pause();
+ _exit(0);
+ }
+
+ /* Parent process */
+ close(pipefd[1]);
+
+ char buf;
+ ASSERT_EQ(read(pipefd[0], &buf, 1), 1);
+ close(pipefd[0]);
+
+ if (buf == 'S') {
+ /* Child couldn't create namespace, skip test */
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+ close(fd_parent_netns);
+ close(parent_sock);
+ SKIP(return, "No permission to create network namespace");
+ }
+
+ ASSERT_EQ(buf, 'Y');
+
+ /* Open child's network namespace */
+ char path[256];
+ snprintf(path, sizeof(path), "/proc/%d/ns/net", pid);
+ fd_child_netns = open(path, O_RDONLY);
+ ASSERT_GE(fd_child_netns, 0);
+
+ /* Get child's network namespace ID */
+ ret = ioctl(fd_child_netns, NS_GET_ID, &child_net_ns_id);
+ ASSERT_EQ(ret, 0);
+ ASSERT_NE(child_net_ns_id, 0);
+
+ /* Create socket in child's namespace to get cookie */
+ ret = setns(fd_child_netns, CLONE_NEWNET);
+ if (ret == 0) {
+ child_sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_GE(child_sock, 0);
+
+ optlen = sizeof(child_netns_cookie);
+ ret = getsockopt(child_sock, SOL_SOCKET, SO_NETNS_COOKIE, &child_netns_cookie, &optlen);
+ ASSERT_EQ(ret, 0);
+
+ /* Verify child's ID and cookie match */
+ ASSERT_EQ(child_net_ns_id, child_netns_cookie);
+
+ close(child_sock);
+
+ /* Return to parent namespace */
+ setns(fd_parent_netns, CLONE_NEWNET);
+ }
+
+ /* Parent and child should have different network namespace IDs */
+ ASSERT_NE(parent_net_ns_id, child_net_ns_id);
+ if (child_netns_cookie != 0) {
+ ASSERT_NE(parent_netns_cookie, child_netns_cookie);
+ }
+
+ close(fd_parent_netns);
+ close(fd_child_netns);
+ close(parent_sock);
+
+ /* Clean up child process */
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index b31a71f2b372..2b31d4a93ad7 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -99,6 +99,7 @@ TEST_GEN_PROGS += bind_wildcard
TEST_GEN_PROGS += bind_timewait
TEST_PROGS += test_vxlan_mdb.sh
TEST_PROGS += test_bridge_neigh_suppress.sh
+TEST_PROGS += test_vxlan_nh.sh
TEST_PROGS += test_vxlan_nolocalbypass.sh
TEST_PROGS += test_bridge_backup_port.sh
TEST_PROGS += test_neigh.sh
@@ -115,6 +116,7 @@ TEST_PROGS += skf_net_off.sh
TEST_GEN_FILES += skf_net_off
TEST_GEN_FILES += tfo
TEST_PROGS += tfo_passive.sh
+TEST_PROGS += broadcast_ether_dst.sh
TEST_PROGS += broadcast_pmtu.sh
TEST_PROGS += ipv6_force_forwarding.sh
diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c
index 57ff67a3751e..da04b0b19b73 100644
--- a/tools/testing/selftests/net/bind_bhash.c
+++ b/tools/testing/selftests/net/bind_bhash.c
@@ -75,7 +75,7 @@ static void *setup(void *arg)
int *array = (int *)arg;
for (i = 0; i < MAX_CONNECTIONS; i++) {
- sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr);
+ sock_fd = bind_socket(SO_REUSEPORT, setup_addr);
if (sock_fd < 0) {
ret = sock_fd;
pthread_exit(&ret);
@@ -103,7 +103,7 @@ int main(int argc, const char *argv[])
setup_addr = use_v6 ? setup_addr_v6 : setup_addr_v4;
- listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr);
+ listener_fd = bind_socket(SO_REUSEPORT, setup_addr);
if (listen(listener_fd, 100) < 0) {
perror("listen failed");
return -1;
diff --git a/tools/testing/selftests/net/broadcast_ether_dst.sh b/tools/testing/selftests/net/broadcast_ether_dst.sh
new file mode 100755
index 000000000000..334a7eca8a80
--- /dev/null
+++ b/tools/testing/selftests/net/broadcast_ether_dst.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Author: Brett A C Sheffield <bacs@librecast.net>
+# Author: Oscar Maes <oscmaes92@gmail.com>
+#
+# Ensure destination ethernet field is correctly set for
+# broadcast packets
+
+source lib.sh
+
+CLIENT_IP4="192.168.0.1"
+GW_IP4="192.168.0.2"
+
+setup() {
+ setup_ns CLIENT_NS SERVER_NS
+
+ ip -net "${SERVER_NS}" link add link1 type veth \
+ peer name link0 netns "${CLIENT_NS}"
+
+ ip -net "${CLIENT_NS}" link set link0 up
+ ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}"/24 dev link0
+
+ ip -net "${SERVER_NS}" link set link1 up
+
+ ip -net "${CLIENT_NS}" route add default via "${GW_IP4}"
+ ip netns exec "${CLIENT_NS}" arp -s "${GW_IP4}" 00:11:22:33:44:55
+}
+
+cleanup() {
+ rm -f "${CAPFILE}" "${OUTPUT}"
+ ip -net "${SERVER_NS}" link del link1
+ cleanup_ns "${CLIENT_NS}" "${SERVER_NS}"
+}
+
+test_broadcast_ether_dst() {
+ local rc=0
+ CAPFILE=$(mktemp -u cap.XXXXXXXXXX)
+ OUTPUT=$(mktemp -u out.XXXXXXXXXX)
+
+ echo "Testing ethernet broadcast destination"
+
+ # start tcpdump listening for icmp
+ # tcpdump will exit after receiving a single packet
+ # timeout will kill tcpdump if it is still running after 2s
+ timeout 2s ip netns exec "${CLIENT_NS}" \
+ tcpdump -i link0 -c 1 -w "${CAPFILE}" icmp &> "${OUTPUT}" &
+ pid=$!
+ slowwait 1 grep -qs "listening" "${OUTPUT}"
+
+ # send broadcast ping
+ ip netns exec "${CLIENT_NS}" \
+ ping -W0.01 -c1 -b 255.255.255.255 &> /dev/null
+
+ # wait for tcpdump for exit after receiving packet
+ wait "${pid}"
+
+ # compare ethernet destination field to ff:ff:ff:ff:ff:ff
+ ether_dst=$(tcpdump -r "${CAPFILE}" -tnne 2>/dev/null | \
+ awk '{sub(/,/,"",$3); print $3}')
+ if [[ "${ether_dst}" == "ff:ff:ff:ff:ff:ff" ]]; then
+ echo "[ OK ]"
+ rc="${ksft_pass}"
+ else
+ echo "[FAIL] expected dst ether addr to be ff:ff:ff:ff:ff:ff," \
+ "got ${ether_dst}"
+ rc="${ksft_fail}"
+ fi
+
+ return "${rc}"
+}
+
+if [ ! -x "$(command -v tcpdump)" ]; then
+ echo "SKIP: Could not run test without tcpdump tool"
+ exit "${ksft_skip}"
+fi
+
+trap cleanup EXIT
+
+setup
+test_broadcast_ether_dst
+
+exit $?
diff --git a/tools/testing/selftests/net/can/config b/tools/testing/selftests/net/can/config
new file mode 100644
index 000000000000..188f79796670
--- /dev/null
+++ b/tools/testing/selftests/net/can/config
@@ -0,0 +1,3 @@
+CONFIG_CAN=m
+CONFIG_CAN_DEV=m
+CONFIG_CAN_VCAN=m
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index b39f748c2572..2b0a90581e2f 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -467,8 +467,8 @@ ipv6_fdb_grp_fcnal()
log_test $? 0 "Get Fdb nexthop group by id"
# fdb nexthop group can only contain fdb nexthops
- run_cmd "$IP nexthop add id 63 via 2001:db8:91::4"
- run_cmd "$IP nexthop add id 64 via 2001:db8:91::5"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::4 dev veth1"
+ run_cmd "$IP nexthop add id 64 via 2001:db8:91::5 dev veth1"
run_cmd "$IP nexthop add id 103 group 63/64 fdb"
log_test $? 2 "Fdb Nexthop group with non-fdb nexthops"
@@ -494,6 +494,26 @@ ipv6_fdb_grp_fcnal()
run_cmd "$IP nexthop add id 69 encap mpls 101 via 2001:db8:91::8 dev veth1 fdb"
log_test $? 2 "Fdb Nexthop with encap"
+ # Replace FDB nexthop to non-FDB and vice versa
+ run_cmd "$IP nexthop add id 70 via 2001:db8:91::2 fdb"
+ run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 dev veth1"
+ log_test $? 0 "Replace FDB nexthop to non-FDB nexthop"
+ run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 fdb"
+ log_test $? 0 "Replace non-FDB nexthop to FDB nexthop"
+
+ # Replace FDB nexthop address while in a group
+ run_cmd "$IP nexthop add id 71 group 70 fdb"
+ run_cmd "$IP nexthop replace id 70 via 2001:db8:91::3 fdb"
+ log_test $? 0 "Replace FDB nexthop address while in a group"
+
+ # Cannot replace FDB nexthop to non-FDB and vice versa while in a group
+ run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 dev veth1"
+ log_test $? 2 "Replace FDB nexthop to non-FDB nexthop while in a group"
+ run_cmd "$IP nexthop add id 72 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 73 group 72"
+ run_cmd "$IP nexthop replace id 72 via 2001:db8:91::2 fdb"
+ log_test $? 2 "Replace non-FDB nexthop to FDB nexthop while in a group"
+
run_cmd "$IP link add name vx10 type vxlan id 1010 local 2001:db8:91::9 remote 2001:db8:91::10 dstport 4789 nolearning noudpcsum tos inherit ttl 100"
run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self"
log_test $? 0 "Fdb mac add with nexthop group"
@@ -547,15 +567,15 @@ ipv4_fdb_grp_fcnal()
log_test $? 0 "Get Fdb nexthop group by id"
# fdb nexthop group can only contain fdb nexthops
- run_cmd "$IP nexthop add id 14 via 172.16.1.2"
- run_cmd "$IP nexthop add id 15 via 172.16.1.3"
+ run_cmd "$IP nexthop add id 14 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 15 via 172.16.1.3 dev veth1"
run_cmd "$IP nexthop add id 103 group 14/15 fdb"
log_test $? 2 "Fdb Nexthop group with non-fdb nexthops"
# Non fdb nexthop group can not contain fdb nexthops
run_cmd "$IP nexthop add id 16 via 172.16.1.2 fdb"
run_cmd "$IP nexthop add id 17 via 172.16.1.3 fdb"
- run_cmd "$IP nexthop add id 104 group 14/15"
+ run_cmd "$IP nexthop add id 104 group 16/17"
log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops"
# fdb nexthop cannot have blackhole
@@ -574,6 +594,26 @@ ipv4_fdb_grp_fcnal()
run_cmd "$IP nexthop add id 17 encap mpls 101 via 172.16.1.2 dev veth1 fdb"
log_test $? 2 "Fdb Nexthop with encap"
+ # Replace FDB nexthop to non-FDB and vice versa
+ run_cmd "$IP nexthop add id 18 via 172.16.1.2 fdb"
+ run_cmd "$IP nexthop replace id 18 via 172.16.1.2 dev veth1"
+ log_test $? 0 "Replace FDB nexthop to non-FDB nexthop"
+ run_cmd "$IP nexthop replace id 18 via 172.16.1.2 fdb"
+ log_test $? 0 "Replace non-FDB nexthop to FDB nexthop"
+
+ # Replace FDB nexthop address while in a group
+ run_cmd "$IP nexthop add id 19 group 18 fdb"
+ run_cmd "$IP nexthop replace id 18 via 172.16.1.3 fdb"
+ log_test $? 0 "Replace FDB nexthop address while in a group"
+
+ # Cannot replace FDB nexthop to non-FDB and vice versa while in a group
+ run_cmd "$IP nexthop replace id 18 via 172.16.1.2 dev veth1"
+ log_test $? 2 "Replace FDB nexthop to non-FDB nexthop while in a group"
+ run_cmd "$IP nexthop add id 20 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 21 group 20"
+ run_cmd "$IP nexthop replace id 20 via 172.16.1.2 fdb"
+ log_test $? 2 "Replace non-FDB nexthop to FDB nexthop while in a group"
+
run_cmd "$IP link add name vx10 type vxlan id 1010 local 10.0.0.1 remote 10.0.0.2 dstport 4789 nolearning noudpcsum tos inherit ttl 100"
run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self"
log_test $? 0 "Fdb mac add with nexthop group"
@@ -582,7 +622,7 @@ ipv4_fdb_grp_fcnal()
run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 12 self"
log_test $? 255 "Fdb mac add with nexthop"
- run_cmd "$IP ro add 172.16.0.0/22 nhid 15"
+ run_cmd "$IP ro add 172.16.0.0/22 nhid 16"
log_test $? 2 "Route add with fdb nexthop"
run_cmd "$IP ro add 172.16.0.0/22 nhid 103"
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
index b98ea9449b8b..dfb6646cb97b 100755
--- a/tools/testing/selftests/net/forwarding/router.sh
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -18,6 +18,8 @@
# | 2001:db8:1::1/64 2001:db8:2::1/64 |
# | |
# +-----------------------------------------------------------------+
+#
+#shellcheck disable=SC2034 # SC doesn't see our uses of global variables
ALL_TESTS="
ping_ipv4
@@ -27,6 +29,7 @@ ALL_TESTS="
ipv4_sip_equal_dip
ipv6_sip_equal_dip
ipv4_dip_link_local
+ ipv4_sip_link_local
"
NUM_NETIFS=4
@@ -330,6 +333,32 @@ ipv4_dip_link_local()
tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
}
+ipv4_sip_link_local()
+{
+ local sip=169.254.1.1
+
+ RET=0
+
+ # Disable rpfilter to prevent packets to be dropped because of it.
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf."$rp1".rp_filter 0
+
+ tc filter add dev "$rp2" egress protocol ip pref 1 handle 101 \
+ flower src_ip "$sip" action pass
+
+ $MZ "$h1" -t udp "sp=54321,dp=12345" -c 5 -d 1msec -b "$rp1mac" \
+ -A "$sip" -B 198.51.100.2 -q
+
+ tc_check_packets "dev $rp2 egress" 101 5
+ check_err $? "Packets were dropped"
+
+ log_test "IPv4 source IP is link-local"
+
+ tc filter del dev "$rp2" egress protocol ip pref 1 handle 101 flower
+ sysctl_restore net.ipv4.conf."$rp1".rp_filter
+ sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh
index 1f6f53e284b5..6269d5e23487 100755
--- a/tools/testing/selftests/net/forwarding/sch_ets.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets.sh
@@ -11,6 +11,7 @@ ALL_TESTS="
ets_test_strict
ets_test_mixed
ets_test_dwrr
+ ets_test_plug
classifier_mode
ets_test_strict
ets_test_mixed
diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
index 08240d3e3c87..79d837a2868a 100644
--- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
@@ -224,3 +224,11 @@ ets_test_dwrr()
ets_set_dwrr_two_bands
xfail_on_slow ets_dwrr_test_01
}
+
+ets_test_plug()
+{
+ ets_change_qdisc $put 2 "3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3" "1514 1514"
+ tc qdisc add dev $put handle 20: parent 10:4 plug
+ start_traffic_pktsize 100 $h1.10 192.0.2.1 192.0.2.2 00:c1:a0:c1:a0:00 "-c 1"
+ ets_qdisc_setup $put 2
+}
diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c
index 521ba38f2ddd..df4eea5c192b 100644
--- a/tools/testing/selftests/net/lib/xdp_native.bpf.c
+++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c
@@ -14,6 +14,8 @@
#define MAX_PAYLOAD_LEN 5000
#define MAX_HDR_LEN 64
+extern int bpf_xdp_pull_data(struct xdp_md *xdp, __u32 len) __ksym __weak;
+
enum {
XDP_MODE = 0,
XDP_PORT = 1,
@@ -68,30 +70,57 @@ static void record_stats(struct xdp_md *ctx, __u32 stat_type)
static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port)
{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
struct udphdr *udph = NULL;
- struct ethhdr *eth = data;
+ void *data, *data_end;
+ struct ethhdr *eth;
+ int err;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth));
+ if (err)
+ return NULL;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = eth = (void *)(long)ctx->data;
if (data + sizeof(*eth) > data_end)
return NULL;
if (eth->h_proto == bpf_htons(ETH_P_IP)) {
- struct iphdr *iph = data + sizeof(*eth);
+ struct iphdr *iph;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) +
+ sizeof(*udph));
+ if (err)
+ return NULL;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ iph = data + sizeof(*eth);
if (iph + 1 > (struct iphdr *)data_end ||
iph->protocol != IPPROTO_UDP)
return NULL;
- udph = (void *)eth + sizeof(*iph) + sizeof(*eth);
- } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
- struct ipv6hdr *ipv6h = data + sizeof(*eth);
+ udph = data + sizeof(*iph) + sizeof(*eth);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) +
+ sizeof(*udph));
+ if (err)
+ return NULL;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ ipv6h = data + sizeof(*eth);
if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
ipv6h->nexthdr != IPPROTO_UDP)
return NULL;
- udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth);
+ udph = data + sizeof(*ipv6h) + sizeof(*eth);
} else {
return NULL;
}
@@ -145,17 +174,34 @@ static void swap_machdr(void *data)
static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
struct udphdr *udph = NULL;
- struct ethhdr *eth = data;
+ void *data, *data_end;
+ struct ethhdr *eth;
+ int err;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth));
+ if (err)
+ return XDP_PASS;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = eth = (void *)(long)ctx->data;
if (data + sizeof(*eth) > data_end)
return XDP_PASS;
if (eth->h_proto == bpf_htons(ETH_P_IP)) {
- struct iphdr *iph = data + sizeof(*eth);
- __be32 tmp_ip = iph->saddr;
+ struct iphdr *iph;
+ __be32 tmp_ip;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) +
+ sizeof(*udph));
+ if (err)
+ return XDP_PASS;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ iph = data + sizeof(*eth);
if (iph + 1 > (struct iphdr *)data_end ||
iph->protocol != IPPROTO_UDP)
@@ -169,8 +215,10 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
return XDP_PASS;
record_stats(ctx, STATS_RX);
+ eth = data;
swap_machdr((void *)eth);
+ tmp_ip = iph->saddr;
iph->saddr = iph->daddr;
iph->daddr = tmp_ip;
@@ -178,9 +226,19 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
return XDP_TX;
- } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
- struct ipv6hdr *ipv6h = data + sizeof(*eth);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
struct in6_addr tmp_ipv6;
+ struct ipv6hdr *ipv6h;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) +
+ sizeof(*udph));
+ if (err)
+ return XDP_PASS;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ ipv6h = data + sizeof(*eth);
if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
ipv6h->nexthdr != IPPROTO_UDP)
@@ -194,6 +252,7 @@ static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
return XDP_PASS;
record_stats(ctx, STATS_RX);
+ eth = data;
swap_machdr((void *)eth);
__builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6));
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 7a3cb4c09e45..d847ff1737c3 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -28,7 +28,7 @@ flush_pids()
}
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index ac1349c4b9e5..b148cadb96d0 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -183,9 +183,10 @@ static void xgetaddrinfo(const char *node, const char *service,
struct addrinfo *hints,
struct addrinfo **res)
{
-again:
- int err = getaddrinfo(node, service, hints, res);
+ int err;
+again:
+ err = getaddrinfo(node, service, hints, res);
if (err) {
const char *errstr;
@@ -1092,6 +1093,7 @@ int main_loop_s(int listensock)
struct pollfd polls;
socklen_t salen;
int remotesock;
+ int err = 0;
int fd = 0;
again:
@@ -1124,7 +1126,7 @@ again:
SOCK_TEST_TCPULP(remotesock, 0);
memset(&winfo, 0, sizeof(winfo));
- copyfd_io(fd, remotesock, 1, true, &winfo);
+ err = copyfd_io(fd, remotesock, 1, true, &winfo);
} else {
perror("accept");
return 1;
@@ -1133,10 +1135,10 @@ again:
if (cfg_input)
close(fd);
- if (--cfg_repeat > 0)
+ if (!err && --cfg_repeat > 0)
goto again;
- return 0;
+ return err;
}
static void init_rng(void)
@@ -1246,7 +1248,7 @@ void xdisconnect(int fd)
else
xerror("bad family");
- strcpy(cmd, "ss -M | grep -q ");
+ strcpy(cmd, "ss -Mnt | grep -q ");
cmdlen = strlen(cmd);
if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen],
sizeof(cmd) - cmdlen))
@@ -1256,7 +1258,7 @@ void xdisconnect(int fd)
/*
* wait until the pending data is completely flushed and all
- * the MPTCP sockets reached the closed status.
+ * the sockets reached the closed status.
* disconnect will bypass/ignore/drop any pending data.
*/
for (i = 0; ; i += msec_sleep) {
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 5e3c56253274..47ecb5b3836e 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -134,7 +134,7 @@ ns4=""
TEST_GROUP=""
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
rm -f "$cin_disconnect"
@@ -211,6 +211,11 @@ if $checksum; then
done
fi
+if $capture; then
+ rndh="${ns1:4}"
+ mptcp_lib_pr_info "Packet capture files will have this prefix: ${rndh}-"
+fi
+
set_ethtool_flags() {
local ns="$1"
local dev="$2"
@@ -361,7 +366,6 @@ do_transfer()
if $capture; then
local capuser
- local rndh="${connector_ns:4}"
if [ -z $SUDO_USER ] ; then
capuser=""
else
diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c
index 3cf1e2a612ce..f3bcaa48df8f 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_inq.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c
@@ -75,9 +75,10 @@ static void xgetaddrinfo(const char *node, const char *service,
struct addrinfo *hints,
struct addrinfo **res)
{
-again:
- int err = getaddrinfo(node, service, hints, res);
+ int err;
+again:
+ err = getaddrinfo(node, service, hints, res);
if (err) {
const char *errstr;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index b8af65373b3a..7fd555b123b9 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -8,7 +8,7 @@
# ShellCheck incorrectly believes that most of the code here is unreachable
# because it's invoked by variable name, see how the "tests" array is used
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
. "$(dirname "${0}")/mptcp_lib.sh"
@@ -3842,6 +3842,7 @@ endpoint_tests()
# remove and re-add
if reset_with_events "delete re-add signal" &&
mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=0
pm_nl_set_limits $ns1 0 3
pm_nl_set_limits $ns2 3 3
pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 09cd24b2ae46..d62e653d48b0 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -384,7 +384,7 @@ mptcp_lib_make_file() {
mptcp_lib_print_file_err() {
ls -l "${1}" 1>&2
echo "Trailing bytes are: "
- tail -c 27 "${1}"
+ tail -c 32 "${1}" | od -x | head -n2
}
# $1: input file ; $2: output file ; $3: what kind of file
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
index 9934a68df237..112c07c4c37a 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -162,9 +162,10 @@ static void xgetaddrinfo(const char *node, const char *service,
struct addrinfo *hints,
struct addrinfo **res)
{
-again:
- int err = getaddrinfo(node, service, hints, res);
+ int err;
+again:
+ err = getaddrinfo(node, service, hints, res);
if (err) {
const char *errstr;
@@ -666,22 +667,26 @@ static void process_one_client(int fd, int pipefd)
do_getsockopts(&s, fd, ret, ret2);
if (s.mptcpi_rcv_delta != (uint64_t)ret + 1)
- xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret);
+ xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64 ", diff %" PRId64,
+ s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - (ret + 1));
/* be nice when running on top of older kernel */
if (s.pkt_stats_avail) {
if (s.last_sample.mptcpi_bytes_sent != ret2)
- xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64,
+ xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64
+ ", diff %" PRId64,
s.last_sample.mptcpi_bytes_sent, ret2,
s.last_sample.mptcpi_bytes_sent - ret2);
if (s.last_sample.mptcpi_bytes_received != ret)
- xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64,
+ xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64
+ ", diff %" PRId64,
s.last_sample.mptcpi_bytes_received, ret,
s.last_sample.mptcpi_bytes_received - ret);
if (s.last_sample.mptcpi_bytes_acked != ret)
- xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64,
- s.last_sample.mptcpi_bytes_acked, ret2,
- s.last_sample.mptcpi_bytes_acked - ret2);
+ xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64
+ ", diff %" PRId64,
+ s.last_sample.mptcpi_bytes_acked, ret,
+ s.last_sample.mptcpi_bytes_acked - ret);
}
close(fd);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 418a903c3a4d..f01989be6e9b 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -95,7 +95,7 @@ init()
}
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}"
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 2e6648a2b2c0..ec6a87588191 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -32,7 +32,7 @@ ns1=""
err=$(mktemp)
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
rm -f "${err}"
@@ -70,8 +70,9 @@ format_endpoints() {
mptcp_lib_pm_nl_format_endpoints "${@}"
}
+# This function is invoked indirectly
+#shellcheck disable=SC2317,SC2329
get_endpoint() {
- # shellcheck disable=SC2317 # invoked indirectly
mptcp_lib_pm_nl_get_endpoint "${ns1}" "${@}"
}
@@ -198,6 +199,7 @@ set_limits 1 9 2>/dev/null
check "get_limits" "${default_limits}" "subflows above hard limit"
set_limits 8 8
+flush_endpoint ## to make sure it doesn't affect the limits
check "get_limits" "$(format_limits 8 8)" "set limits"
flush_endpoint
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 994a556f46c1..93fea3442216 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -188,6 +188,13 @@ static int capture_events(int fd, int event_group)
fprintf(stderr, ",error:%u", *(__u8 *)RTA_DATA(attrs));
else if (attrs->rta_type == MPTCP_ATTR_SERVER_SIDE)
fprintf(stderr, ",server_side:%u", *(__u8 *)RTA_DATA(attrs));
+ else if (attrs->rta_type == MPTCP_ATTR_FLAGS) {
+ __u16 flags = *(__u16 *)RTA_DATA(attrs);
+
+ /* only print when present, easier */
+ if (flags & MPTCP_PM_EV_FLAG_DENY_JOIN_ID0)
+ fprintf(stderr, ",deny_join_id0:1");
+ }
attrs = RTA_NEXT(attrs, msg_len);
}
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 2329c2f8519b..1903e8e84a31 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -35,7 +35,7 @@ usage() {
}
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
rm -f "$cout" "$sout"
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 333064b0b5ac..3d45991f24ed 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -94,7 +94,7 @@ test_fail()
}
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
print_title "Cleanup"
@@ -201,6 +201,9 @@ make_connection()
is_v6="v4"
fi
+ # set this on the client side only: will not affect the rest
+ ip netns exec "$ns2" sysctl -q net.mptcp.allow_join_initial_addr_port=0
+
:>"$client_evts"
:>"$server_evts"
@@ -223,23 +226,28 @@ make_connection()
local client_token
local client_port
local client_serverside
+ local client_nojoin
local server_token
local server_serverside
+ local server_nojoin
client_token=$(mptcp_lib_evts_get_info token "$client_evts")
client_port=$(mptcp_lib_evts_get_info sport "$client_evts")
client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts")
+ client_nojoin=$(mptcp_lib_evts_get_info deny_join_id0 "$client_evts")
server_token=$(mptcp_lib_evts_get_info token "$server_evts")
server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts")
+ server_nojoin=$(mptcp_lib_evts_get_info deny_join_id0 "$server_evts")
print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1"
- if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
- [ "$server_serverside" = 1 ]
+ if [ "${client_token}" != "" ] && [ "${server_token}" != "" ] &&
+ [ "${client_serverside}" = 0 ] && [ "${server_serverside}" = 1 ] &&
+ [ "${client_nojoin:-0}" = 0 ] && [ "${server_nojoin:-0}" = 1 ]
then
test_pass
print_title "Connection info: ${client_addr}:${client_port} -> ${connect_addr}:${app_port}"
else
- test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})"
+ test_fail "Expected tokens (c:${client_token} - s:${server_token}), server (c:${client_serverside} - s:${server_serverside}), nojoin (c:${client_nojoin} - s:${server_nojoin})"
mptcp_lib_result_print_all_tap
exit ${KSFT_FAIL}
fi
diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
index 606a43a60f73..7fc6c5dbd551 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_clash.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
@@ -99,7 +99,7 @@ run_one_clash_test()
local entries
local cre
- if ! ip netns exec "$ns" ./udpclash $daddr $dport;then
+ if ! ip netns exec "$ns" timeout 30 ./udpclash $daddr $dport;then
echo "INFO: did not receive expected number of replies for $daddr:$dport"
ip netns exec "$ctns" conntrack -S
# don't fail: check if clash resolution triggered after all.
diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
index 788cd56ea4a0..615fe3c6f405 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_resize.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
@@ -187,7 +187,7 @@ ct_udpclash()
[ -x udpclash ] || return
while [ $now -lt $end ]; do
- ip netns exec "$ns" ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1
+ ip netns exec "$ns" timeout 30 ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1
now=$(date +%s)
done
@@ -277,6 +277,7 @@ check_taint()
insert_flood()
{
local n="$1"
+ local timeout="$2"
local r=0
r=$((RANDOM%$insert_count))
@@ -302,7 +303,7 @@ test_floodresize_all()
read tainted_then < /proc/sys/kernel/tainted
for n in "$nsclient1" "$nsclient2";do
- insert_flood "$n" &
+ insert_flood "$n" "$timeout" &
done
# resize table constantly while flood/insert/dump/flushs
diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
index a4ee5496f2a1..45832df98295 100755
--- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -20,6 +20,7 @@ ret=0
SOCAT_TIMEOUT=60
nsin=""
+nsin_small=""
ns1out=""
ns2out=""
@@ -36,7 +37,7 @@ cleanup() {
cleanup_all_ns
- rm -f "$nsin" "$ns1out" "$ns2out"
+ rm -f "$nsin" "$nsin_small" "$ns1out" "$ns2out"
[ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns"
}
@@ -72,6 +73,7 @@ lmtu=1500
rmtu=2000
filesize=$((2 * 1024 * 1024))
+filesize_small=$((filesize / 16))
usage(){
echo "nft_flowtable.sh [OPTIONS]"
@@ -89,7 +91,10 @@ do
o) omtu=$OPTARG;;
l) lmtu=$OPTARG;;
r) rmtu=$OPTARG;;
- s) filesize=$OPTARG;;
+ s)
+ filesize=$OPTARG
+ filesize_small=$((OPTARG / 16))
+ ;;
*) usage;;
esac
done
@@ -215,6 +220,7 @@ if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then
fi
nsin=$(mktemp)
+nsin_small=$(mktemp)
ns1out=$(mktemp)
ns2out=$(mktemp)
@@ -265,6 +271,7 @@ check_counters()
check_dscp()
{
local what=$1
+ local pmtud="$2"
local ok=1
local counter
@@ -277,37 +284,39 @@ check_dscp()
local pc4z=${counter%*bytes*}
local pc4z=${pc4z#*packets}
+ local failmsg="FAIL: pmtu $pmtu: $what counters do not match, expected"
+
case "$what" in
"dscp_none")
if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_fwd")
if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_ingress")
if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_egress")
if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
*)
- echo "FAIL: Unknown DSCP check" 1>&2
+ echo "$failmsg: Unknown DSCP check" 1>&2
ret=1
ok=0
esac
@@ -319,9 +328,9 @@ check_dscp()
check_transfer()
{
- in=$1
- out=$2
- what=$3
+ local in=$1
+ local out=$2
+ local what=$3
if ! cmp "$in" "$out" > /dev/null 2>&1; then
echo "FAIL: file mismatch for $what" 1>&2
@@ -342,25 +351,39 @@ test_tcp_forwarding_ip()
{
local nsa=$1
local nsb=$2
- local dstip=$3
- local dstport=$4
+ local pmtu=$3
+ local dstip=$4
+ local dstport=$5
local lret=0
+ local socatc
+ local socatl
+ local infile="$nsin"
+
+ if [ $pmtu -eq 0 ]; then
+ infile="$nsin_small"
+ fi
- timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" &
+ timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" &
lpid=$!
busywait 1000 listener_ready
- timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out"
+ timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$infile" > "$ns1out"
+ socatc=$?
wait $lpid
+ socatl=$?
- if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then
+ if [ $socatl -ne 0 ] || [ $socatc -ne 0 ];then
+ rc=1
+ fi
+
+ if ! check_transfer "$infile" "$ns2out" "ns1 -> ns2"; then
lret=1
ret=1
fi
- if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then
+ if ! check_transfer "$infile" "$ns1out" "ns1 <- ns2"; then
lret=1
ret=1
fi
@@ -370,14 +393,16 @@ test_tcp_forwarding_ip()
test_tcp_forwarding()
{
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+ local pmtu="$3"
+
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
return $?
}
test_tcp_forwarding_set_dscp()
{
- check_dscp "dscp_none"
+ local pmtu="$3"
ip netns exec "$nsr1" nft -f - <<EOF
table netdev dscpmangle {
@@ -388,8 +413,8 @@ table netdev dscpmangle {
}
EOF
if [ $? -eq 0 ]; then
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- check_dscp "dscp_ingress"
+ test_tcp_forwarding_ip "$1" "$2" "$3" 10.0.2.99 12345
+ check_dscp "dscp_ingress" "$pmtu"
ip netns exec "$nsr1" nft delete table netdev dscpmangle
else
@@ -405,10 +430,10 @@ table netdev dscpmangle {
}
EOF
if [ $? -eq 0 ]; then
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- check_dscp "dscp_egress"
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
+ check_dscp "dscp_egress" "$pmtu"
- ip netns exec "$nsr1" nft flush table netdev dscpmangle
+ ip netns exec "$nsr1" nft delete table netdev dscpmangle
else
echo "SKIP: Could not load netdev:egress for veth1"
fi
@@ -416,48 +441,53 @@ fi
# partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3
# counters should have seen packets (before and after ft offload kicks in).
ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- check_dscp "dscp_fwd"
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
+ check_dscp "dscp_fwd" "$pmtu"
}
test_tcp_forwarding_nat()
{
+ local nsa="$1"
+ local nsb="$2"
+ local pmtu="$3"
+ local what="$4"
local lret
- local pmtu
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- lret=$?
+ [ "$pmtu" -eq 0 ] && what="$what (pmtu disabled)"
- pmtu=$3
- what=$4
+ test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 10.0.2.99 12345
+ lret=$?
if [ "$lret" -eq 0 ] ; then
if [ "$pmtu" -eq 1 ] ;then
- check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what"
+ check_counters "flow offload for ns1/ns2 with masquerade $what"
else
echo "PASS: flow offload for ns1/ns2 with masquerade $what"
fi
- test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.6.6.6 1666
lret=$?
if [ "$pmtu" -eq 1 ] ;then
- check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what"
+ check_counters "flow offload for ns1/ns2 with dnat $what"
elif [ "$lret" -eq 0 ] ; then
echo "PASS: flow offload for ns1/ns2 with dnat $what"
fi
+ else
+ echo "FAIL: flow offload for ns1/ns2 with dnat $what"
fi
return $lret
}
make_file "$nsin" "$filesize"
+make_file "$nsin_small" "$filesize_small"
# First test:
# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
# Due to MTU mismatch in both directions, all packets (except small packets like pure
# acks) have to be handled by normal forwarding path. Therefore, packet counters
# are not checked.
-if test_tcp_forwarding "$ns1" "$ns2"; then
+if test_tcp_forwarding "$ns1" "$ns2" 0; then
echo "PASS: flow offloaded for ns1/ns2"
else
echo "FAIL: flow offload for ns1/ns2:" 1>&2
@@ -489,8 +519,9 @@ table ip nat {
}
EOF
+check_dscp "dscp_none" "0"
if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then
- echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2
+ echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2
exit 0
fi
@@ -513,6 +544,14 @@ ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
# For earlier tests (large mtus), packets cannot be handled via flowtable
# (except pure acks and other small packets).
ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
+ip netns exec "$ns2" nft reset counters table inet filter >/dev/null
+
+if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 ""; then
+ echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2
+ exit 0
+fi
+
+ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then
echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
@@ -644,7 +683,7 @@ ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1
ip -net "$ns2" route add default via 10.0.2.1
ip -net "$ns2" route add default via dead:2::1
-if test_tcp_forwarding "$ns1" "$ns2"; then
+if test_tcp_forwarding "$ns1" "$ns2" 1; then
check_counters "ipsec tunnel mode for ns1/ns2"
else
echo "FAIL: ipsec tunnel mode for ns1/ns2"
@@ -668,7 +707,7 @@ if [ "$1" = "" ]; then
fi
echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize"
- $0 -o "$o" -l "$l" -r "$r" -s "$filesize"
+ $0 -o "$o" -l "$l" -r "$r" -s "$filesize" || ret=1
fi
exit $ret
diff --git a/tools/testing/selftests/net/netfilter/udpclash.c b/tools/testing/selftests/net/netfilter/udpclash.c
index 85c7b906ad08..79de163d61ab 100644
--- a/tools/testing/selftests/net/netfilter/udpclash.c
+++ b/tools/testing/selftests/net/netfilter/udpclash.c
@@ -29,7 +29,7 @@ struct thread_args {
int sockfd;
};
-static int wait = 1;
+static volatile int wait = 1;
static void *thread_main(void *varg)
{
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
index 3c8d3455d8e7..b327d3061ed5 100755
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -25,6 +25,7 @@ tests="
nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT
netlink_checks ovsnl: validate netlink attrs and settings
upcall_interfaces ovs: test the upcall interfaces
+ tunnel_metadata ovs: test extraction of tunnel metadata
drop_reason drop: test drop reasons are emitted
psample psample: Sampling packets with psample"
@@ -113,13 +114,13 @@ ovs_add_dp () {
}
ovs_add_if () {
- info "Adding IF to DP: br:$2 if:$3"
- if [ "$4" != "-u" ]; then
- ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if "$2" "$3" \
- || return 1
+ info "Adding IF to DP: br:$3 if:$4 ($2)"
+ if [ "$5" != "-u" ]; then
+ ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if \
+ -t "$2" "$3" "$4" || return 1
else
python3 $ovs_base/ovs-dpctl.py add-if \
- -u "$2" "$3" >$ovs_dir/$3.out 2>$ovs_dir/$3.err &
+ -u -t "$2" "$3" "$4" >$ovs_dir/$4.out 2>$ovs_dir/$4.err &
pid=$!
on_exit "ovs_sbx $1 kill -TERM $pid 2>/dev/null"
fi
@@ -166,9 +167,9 @@ ovs_add_netns_and_veths () {
fi
if [ "$7" != "-u" ]; then
- ovs_add_if "$1" "$2" "$4" || return 1
+ ovs_add_if "$1" "netdev" "$2" "$4" || return 1
else
- ovs_add_if "$1" "$2" "$4" -u || return 1
+ ovs_add_if "$1" "netdev" "$2" "$4" -u || return 1
fi
if [ $TRACING -eq 1 ]; then
@@ -756,6 +757,79 @@ test_upcall_interfaces() {
return 0
}
+ovs_add_kernel_tunnel() {
+ local sbxname=$1; shift
+ local ns=$1; shift
+ local tnl_type=$1; shift
+ local name=$1; shift
+ local addr=$1; shift
+
+ info "setting up kernel ${tnl_type} tunnel ${name}"
+ ovs_sbx "${sbxname}" ip -netns ${ns} link add dev ${name} type ${tnl_type} $* || return 1
+ on_exit "ovs_sbx ${sbxname} ip -netns ${ns} link del ${name} >/dev/null 2>&1"
+ ovs_sbx "${sbxname}" ip -netns ${ns} addr add dev ${name} ${addr} || return 1
+ ovs_sbx "${sbxname}" ip -netns ${ns} link set dev ${name} mtu 1450 up || return 1
+}
+
+test_tunnel_metadata() {
+ which arping >/dev/null 2>&1 || return $ksft_skip
+
+ sbxname="test_tunnel_metadata"
+ sbx_add "${sbxname}" || return 1
+
+ info "setting up new DP"
+ ovs_add_dp "${sbxname}" tdp0 -V 2:1 || return 1
+
+ ovs_add_netns_and_veths "${sbxname}" tdp0 tns left0 l0 \
+ 172.31.110.1/24 || return 1
+
+ info "removing veth interface from openvswitch and setting IP"
+ ovs_del_if "${sbxname}" tdp0 left0 || return 1
+ ovs_sbx "${sbxname}" ip addr add 172.31.110.2/24 dev left0 || return 1
+ ovs_sbx "${sbxname}" ip link set left0 up || return 1
+
+ info "setting up tunnel port in openvswitch"
+ ovs_add_if "${sbxname}" "vxlan" tdp0 ovs-vxlan0 -u || return 1
+ on_exit "ovs_sbx ${sbxname} ip link del ovs-vxlan0"
+ ovs_wait ip link show ovs-vxlan0 &>/dev/null || return 1
+ ovs_sbx "${sbxname}" ip link set ovs-vxlan0 up || return 1
+
+ configs=$(echo '
+ 1 172.31.221.1/24 1155332 32 set udpcsum flags\(df\|csum\)
+ 2 172.31.222.1/24 1234567 45 set noudpcsum flags\(df\)
+ 3 172.31.223.1/24 1020304 23 unset udpcsum flags\(csum\)
+ 4 172.31.224.1/24 1357986 15 unset noudpcsum' | sed '/^$/d')
+
+ while read -r i addr id ttl df csum flags; do
+ ovs_add_kernel_tunnel "${sbxname}" tns vxlan vxlan${i} ${addr} \
+ remote 172.31.110.2 id ${id} dstport 4789 \
+ ttl ${ttl} df ${df} ${csum} || return 1
+ done <<< "${configs}"
+
+ ovs_wait grep -q 'listening on upcall packet handler' \
+ ${ovs_dir}/ovs-vxlan0.out || return 1
+
+ info "sending arping"
+ for i in 1 2 3 4; do
+ ovs_sbx "${sbxname}" ip netns exec tns \
+ arping -I vxlan${i} 172.31.22${i}.2 -c 1 \
+ >${ovs_dir}/arping.stdout 2>${ovs_dir}/arping.stderr
+ done
+
+ info "checking that received decapsulated packets carry correct metadata"
+ while read -r i addr id ttl df csum flags; do
+ arp_hdr="arp\\(sip=172.31.22${i}.1,tip=172.31.22${i}.2,op=1,sha="
+ addrs="src=172.31.110.1,dst=172.31.110.2"
+ ports="tp_src=[0-9]*,tp_dst=4789"
+ tnl_md="tunnel\\(tun_id=${id},${addrs},ttl=${ttl},${ports},${flags}\\)"
+
+ ovs_sbx "${sbxname}" grep -qE "MISS upcall.*${tnl_md}.*${arp_hdr}" \
+ ${ovs_dir}/ovs-vxlan0.out || return 1
+ done <<< "${configs}"
+
+ return 0
+}
+
run_test() {
(
tname="$1"
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-disconnect.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-disconnect.pkt
new file mode 100644
index 000000000000..26794e7ddfd5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-disconnect.pkt
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x602 /proc/sys/net/ipv4/tcp_timestamps=0`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,nop,nop,sackOK>
+ +0 > S. 0:0(0) ack 11 win 65535 <mss 1460,nop,nop,sackOK>
+
+// sk->sk_state is TCP_SYN_RECV
+ +.1 accept(3, ..., ...) = 4
+
+// tcp_disconnect() sets sk->sk_state to TCP_CLOSE
+ +0 connect(4, AF_UNSPEC, ...) = 0
+ +0 > R. 1:1(0) ack 11 win 65535
+
+// connect() sets sk->sk_state to TCP_SYN_SENT
+ +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation is now in progress)
+ +0 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// tp->fastopen_rsk must be NULL
+ +1 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/test_vxlan_nh.sh b/tools/testing/selftests/net/test_vxlan_nh.sh
new file mode 100755
index 000000000000..20f3369f776b
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_nh.sh
@@ -0,0 +1,223 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+TESTS="
+ basic_tx_ipv4
+ basic_tx_ipv6
+ learning
+ proxy_ipv4
+ proxy_ipv6
+"
+VERBOSE=0
+
+################################################################################
+# Utilities
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ stderr=
+ fi
+
+ out=$(eval "$cmd" "$stderr")
+ rc=$?
+ if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+################################################################################
+# Cleanup
+
+exit_cleanup_all()
+{
+ cleanup_all_ns
+ exit "${EXIT_STATUS}"
+}
+
+################################################################################
+# Tests
+
+nh_stats_get()
+{
+ ip -n "$ns1" -s -j nexthop show id 10 | jq ".[][\"group_stats\"][][\"packets\"]"
+}
+
+tc_stats_get()
+{
+ tc_rule_handle_stats_get "dev dummy1 egress" 101 ".packets" "-n $ns1"
+}
+
+basic_tx_common()
+{
+ local af_str=$1; shift
+ local proto=$1; shift
+ local local_addr=$1; shift
+ local plen=$1; shift
+ local remote_addr=$1; shift
+
+ RET=0
+
+ # Test basic Tx functionality. Check that stats are incremented on
+ # both the FDB nexthop group and the egress device.
+
+ run_cmd "ip -n $ns1 link add name dummy1 up type dummy"
+ run_cmd "ip -n $ns1 route add $remote_addr/$plen dev dummy1"
+ run_cmd "tc -n $ns1 qdisc add dev dummy1 clsact"
+ run_cmd "tc -n $ns1 filter add dev dummy1 egress proto $proto pref 1 handle 101 flower ip_proto udp dst_ip $remote_addr dst_port 4789 action pass"
+
+ run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo"
+
+ run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb"
+ run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
+
+ run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789"
+ run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10"
+
+ run_cmd "ip netns exec $ns1 mausezahn vx0 -a own -b 00:11:22:33:44:55 -c 1 -q"
+
+ busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" nh_stats_get > /dev/null
+ check_err $? "FDB nexthop group stats did not increase"
+
+ busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" tc_stats_get > /dev/null
+ check_err $? "tc filter stats did not increase"
+
+ log_test "VXLAN FDB nexthop: $af_str basic Tx"
+}
+
+basic_tx_ipv4()
+{
+ basic_tx_common "IPv4" ipv4 192.0.2.1 32 192.0.2.2
+}
+
+basic_tx_ipv6()
+{
+ basic_tx_common "IPv6" ipv6 2001:db8:1::1 128 2001:db8:1::2
+}
+
+learning()
+{
+ RET=0
+
+ # When learning is enabled on the VXLAN device, an incoming packet
+ # might try to refresh an FDB entry that points to an FDB nexthop group
+ # instead of an ordinary remote destination. Check that the kernel does
+ # not crash in this situation.
+
+ run_cmd "ip -n $ns1 address add 192.0.2.1/32 dev lo"
+ run_cmd "ip -n $ns1 address add 192.0.2.2/32 dev lo"
+
+ run_cmd "ip -n $ns1 nexthop add id 1 via 192.0.2.3 fdb"
+ run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
+
+ run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 12345 localbypass"
+ run_cmd "ip -n $ns1 link add name vx1 up type vxlan id 10020 local 192.0.2.2 dstport 54321 learning"
+
+ run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static dst 192.0.2.2 port 54321 vni 10020"
+ run_cmd "bridge -n $ns1 fdb add 00:aa:bb:cc:dd:ee dev vx1 self static nhid 10"
+
+ run_cmd "ip netns exec $ns1 mausezahn vx0 -a 00:aa:bb:cc:dd:ee -b 00:11:22:33:44:55 -c 1 -q"
+
+ log_test "VXLAN FDB nexthop: learning"
+}
+
+proxy_common()
+{
+ local af_str=$1; shift
+ local local_addr=$1; shift
+ local plen=$1; shift
+ local remote_addr=$1; shift
+ local neigh_addr=$1; shift
+ local ping_cmd=$1; shift
+
+ RET=0
+
+ # When the "proxy" option is enabled on the VXLAN device, the device
+ # will suppress ARP requests and IPv6 Neighbor Solicitation messages if
+ # it is able to reply on behalf of the remote host. That is, if a
+ # matching and valid neighbor entry is configured on the VXLAN device
+ # whose MAC address is not behind the "any" remote (0.0.0.0 / ::). The
+ # FDB entry for the neighbor's MAC address might point to an FDB
+ # nexthop group instead of an ordinary remote destination. Check that
+ # the kernel does not crash in this situation.
+
+ run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo"
+
+ run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb"
+ run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
+
+ run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789 proxy"
+
+ run_cmd "ip -n $ns1 neigh add $neigh_addr lladdr 00:11:22:33:44:55 nud perm dev vx0"
+
+ run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10"
+
+ run_cmd "ip netns exec $ns1 $ping_cmd"
+
+ log_test "VXLAN FDB nexthop: $af_str proxy"
+}
+
+proxy_ipv4()
+{
+ proxy_common "IPv4" 192.0.2.1 32 192.0.2.2 192.0.2.3 \
+ "arping -b -c 1 -s 192.0.2.1 -I vx0 192.0.2.3"
+}
+
+proxy_ipv6()
+{
+ proxy_common "IPv6" 2001:db8:1::1 128 2001:db8:1::2 2001:db8:1::3 \
+ "ndisc6 -r 1 -s 2001:db8:1::1 -w 1 2001:db8:1::3 vx0"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -v Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+while getopts ":t:pvh" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=$((VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+require_command mausezahn
+require_command arping
+require_command ndisc6
+require_command jq
+
+if ! ip nexthop help 2>&1 | grep -q "stats"; then
+ echo "SKIP: iproute2 ip too old, missing nexthop stats support"
+ exit "$ksft_skip"
+fi
+
+trap exit_cleanup_all EXIT
+
+for t in $TESTS
+do
+ setup_ns ns1; $t; cleanup_all_ns;
+done
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 5ded3b3a7538..dd093f9df6f1 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -181,13 +181,12 @@ static int tls_send_cmsg(int fd, unsigned char record_type,
return sendmsg(fd, &msg, flags);
}
-static int tls_recv_cmsg(struct __test_metadata *_metadata,
- int fd, unsigned char record_type,
- void *data, size_t len, int flags)
+static int __tls_recv_cmsg(struct __test_metadata *_metadata,
+ int fd, unsigned char *ctype,
+ void *data, size_t len, int flags)
{
char cbuf[CMSG_SPACE(sizeof(char))];
struct cmsghdr *cmsg;
- unsigned char ctype;
struct msghdr msg;
struct iovec vec;
int n;
@@ -206,7 +205,20 @@ static int tls_recv_cmsg(struct __test_metadata *_metadata,
EXPECT_NE(cmsg, NULL);
EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
- ctype = *((unsigned char *)CMSG_DATA(cmsg));
+ if (ctype)
+ *ctype = *((unsigned char *)CMSG_DATA(cmsg));
+
+ return n;
+}
+
+static int tls_recv_cmsg(struct __test_metadata *_metadata,
+ int fd, unsigned char record_type,
+ void *data, size_t len, int flags)
+{
+ unsigned char ctype;
+ int n;
+
+ n = __tls_recv_cmsg(_metadata, fd, &ctype, data, len, flags);
EXPECT_EQ(ctype, record_type);
return n;
@@ -2164,6 +2176,284 @@ TEST_F(tls, rekey_poll_delay)
}
}
+struct raw_rec {
+ unsigned int plain_len;
+ unsigned char plain_data[100];
+ unsigned int cipher_len;
+ unsigned char cipher_data[128];
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: 'Hello world' */
+static const struct raw_rec id0_data_l11 = {
+ .plain_len = 11,
+ .plain_data = {
+ 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f,
+ 0x72, 0x6c, 0x64,
+ },
+ .cipher_len = 40,
+ .cipher_data = {
+ 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0xa2, 0x33,
+ 0xde, 0x8d, 0x94, 0xf0, 0x29, 0x6c, 0xb1, 0xaf,
+ 0x6a, 0x75, 0xb2, 0x93, 0xad, 0x45, 0xd5, 0xfd,
+ 0x03, 0x51, 0x57, 0x8f, 0xf9, 0xcc, 0x3b, 0x42,
+ },
+};
+
+/* TLS 1.2, AES_CCM, ctrl, seqno:0, plaintext: '' */
+static const struct raw_rec id0_ctrl_l0 = {
+ .plain_len = 0,
+ .plain_data = {
+ },
+ .cipher_len = 29,
+ .cipher_data = {
+ 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x38, 0x7b,
+ 0xa6, 0x1c, 0xdd, 0xa7, 0x19, 0x33, 0xab, 0xae,
+ 0x88, 0xe1, 0xd2, 0x08, 0x4f,
+ },
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: '' */
+static const struct raw_rec id0_data_l0 = {
+ .plain_len = 0,
+ .plain_data = {
+ },
+ .cipher_len = 29,
+ .cipher_data = {
+ 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xc5, 0x37, 0x90,
+ 0x70, 0x45, 0x89, 0xfb, 0x5c, 0xc7, 0x89, 0x03,
+ 0x68, 0x80, 0xd3, 0xd8, 0xcc,
+ },
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: 'Hello world' */
+static const struct raw_rec id1_data_l11 = {
+ .plain_len = 11,
+ .plain_data = {
+ 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f,
+ 0x72, 0x6c, 0x64,
+ },
+ .cipher_len = 40,
+ .cipher_data = {
+ 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x3a, 0x1a, 0x9c,
+ 0xd0, 0xa8, 0x9a, 0xd6, 0x69, 0xd6, 0x1a, 0xe3,
+ 0xb5, 0x1f, 0x0d, 0x2c, 0xe2, 0x97, 0x46, 0xff,
+ 0x2b, 0xcc, 0x5a, 0xc4, 0xa3, 0xb9, 0xef, 0xba,
+ },
+};
+
+/* TLS 1.2, AES_CCM, ctrl, seqno:1, plaintext: '' */
+static const struct raw_rec id1_ctrl_l0 = {
+ .plain_len = 0,
+ .plain_data = {
+ },
+ .cipher_len = 29,
+ .cipher_data = {
+ 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x3e, 0xf0, 0xfe,
+ 0xee, 0xd9, 0xe2, 0x5d, 0xc7, 0x11, 0x4c, 0xe6,
+ 0xb4, 0x7e, 0xef, 0x40, 0x2b,
+ },
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: '' */
+static const struct raw_rec id1_data_l0 = {
+ .plain_len = 0,
+ .plain_data = {
+ },
+ .cipher_len = 29,
+ .cipher_data = {
+ 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xce, 0xfc, 0x86,
+ 0xc8, 0xf0, 0x55, 0xf9, 0x47, 0x3f, 0x74, 0xdc,
+ 0xc9, 0xbf, 0xfe, 0x5b, 0xb1,
+ },
+};
+
+/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: 'Hello world' */
+static const struct raw_rec id2_ctrl_l11 = {
+ .plain_len = 11,
+ .plain_data = {
+ 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f,
+ 0x72, 0x6c, 0x64,
+ },
+ .cipher_len = 40,
+ .cipher_data = {
+ 0x16, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19,
+ 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87,
+ 0x2a, 0x04, 0x11, 0x3d, 0xf8, 0x64, 0x5f, 0x36,
+ 0x8b, 0xa8, 0xee, 0x4c, 0x6d, 0x62, 0xa5, 0x00,
+ },
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: 'Hello world' */
+static const struct raw_rec id2_data_l11 = {
+ .plain_len = 11,
+ .plain_data = {
+ 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f,
+ 0x72, 0x6c, 0x64,
+ },
+ .cipher_len = 40,
+ .cipher_data = {
+ 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19,
+ 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87,
+ 0x8e, 0xa1, 0xd0, 0xcd, 0x33, 0xb5, 0x86, 0x2b,
+ 0x17, 0xf1, 0x52, 0x2a, 0x55, 0x62, 0x65, 0x11,
+ },
+};
+
+/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: '' */
+static const struct raw_rec id2_ctrl_l0 = {
+ .plain_len = 0,
+ .plain_data = {
+ },
+ .cipher_len = 29,
+ .cipher_data = {
+ 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0xdc, 0x5c, 0x0e,
+ 0x41, 0xdd, 0xba, 0xd3, 0xcc, 0xcf, 0x6d, 0xd9,
+ 0x06, 0xdb, 0x79, 0xe5, 0x5d,
+ },
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: '' */
+static const struct raw_rec id2_data_l0 = {
+ .plain_len = 0,
+ .plain_data = {
+ },
+ .cipher_len = 29,
+ .cipher_data = {
+ 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0xc3, 0xca, 0x26,
+ 0x22, 0xe4, 0x25, 0xfb, 0x5f, 0x6d, 0xbf, 0x83,
+ 0x30, 0x48, 0x69, 0x1a, 0x47,
+ },
+};
+
+FIXTURE(zero_len)
+{
+ int fd, cfd;
+ bool notls;
+};
+
+FIXTURE_VARIANT(zero_len)
+{
+ const struct raw_rec *recs[4];
+ ssize_t recv_ret[4];
+};
+
+FIXTURE_VARIANT_ADD(zero_len, data_data_data)
+{
+ .recs = { &id0_data_l11, &id1_data_l11, &id2_data_l11, },
+ .recv_ret = { 33, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, data_0ctrl_data)
+{
+ .recs = { &id0_data_l11, &id1_ctrl_l0, &id2_data_l11, },
+ .recv_ret = { 11, 0, 11, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0data)
+{
+ .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l0, },
+ .recv_ret = { -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0data_0data_ctrl)
+{
+ .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l11, },
+ .recv_ret = { 0, 11, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0ctrl)
+{
+ .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l0, },
+ .recv_ret = { 0, 0, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0ctrl_0ctrl_0ctrl)
+{
+ .recs = { &id0_ctrl_l0, &id1_ctrl_l0, &id2_ctrl_l0, },
+ .recv_ret = { 0, 0, 0, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0data_0data_data)
+{
+ .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l11, },
+ .recv_ret = { 11, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, data_0data_0data)
+{
+ .recs = { &id0_data_l11, &id1_data_l0, &id2_data_l0, },
+ .recv_ret = { 11, -EAGAIN, },
+};
+
+FIXTURE_SETUP(zero_len)
+{
+ struct tls_crypto_info_keys tls12;
+ int ret;
+
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128,
+ &tls12, 0);
+
+ ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
+ if (self->notls)
+ return;
+
+ /* Don't install keys on fd, we'll send raw records */
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
+}
+
+FIXTURE_TEARDOWN(zero_len)
+{
+ close(self->fd);
+ close(self->cfd);
+}
+
+TEST_F(zero_len, test)
+{
+ const struct raw_rec *const *rec;
+ unsigned char buf[128];
+ int rec_off;
+ int i;
+
+ for (i = 0; i < 4 && variant->recs[i]; i++)
+ EXPECT_EQ(send(self->fd, variant->recs[i]->cipher_data,
+ variant->recs[i]->cipher_len, 0),
+ variant->recs[i]->cipher_len);
+
+ rec = &variant->recs[0];
+ rec_off = 0;
+ for (i = 0; i < 4; i++) {
+ int j, ret;
+
+ ret = variant->recv_ret[i] >= 0 ? variant->recv_ret[i] : -1;
+ EXPECT_EQ(__tls_recv_cmsg(_metadata, self->cfd, NULL,
+ buf, sizeof(buf), MSG_DONTWAIT), ret);
+ if (ret == -1)
+ EXPECT_EQ(errno, -variant->recv_ret[i]);
+ if (variant->recv_ret[i] == -EAGAIN)
+ break;
+
+ for (j = 0; j < ret; j++) {
+ while (rec_off == (*rec)->plain_len) {
+ rec++;
+ rec_off = 0;
+ }
+ EXPECT_EQ(buf[j], (*rec)->plain_data[rec_off]);
+ rec_off++;
+ }
+ }
+};
+
FIXTURE(tls_err)
{
int fd, cfd;
@@ -2480,6 +2770,22 @@ TEST_F(tls_err, poll_partial_rec_async)
}
}
+/* Use OOB+large send to trigger copy mode due to memory pressure.
+ * OOB causes a short read.
+ */
+TEST_F(tls_err, oob_pressure)
+{
+ char buf[1<<16];
+ int i;
+
+ memrnd(buf, sizeof(buf));
+
+ EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5);
+ EXPECT_EQ(send(self->fd2, buf, sizeof(buf), 0), sizeof(buf));
+ for (i = 0; i < 64; i++)
+ EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5);
+}
+
TEST(non_established) {
struct tls12_crypto_info_aes_gcm_256 tls12;
struct sockaddr_in addr;
@@ -2708,6 +3014,67 @@ TEST(prequeue) {
close(cfd);
}
+TEST(data_steal) {
+ struct tls_crypto_info_keys tls;
+ char buf[20000], buf2[20000];
+ struct sockaddr_in addr;
+ int sfd, cfd, ret, fd;
+ int pid, status;
+ socklen_t len;
+
+ len = sizeof(addr);
+ memrnd(buf, sizeof(buf));
+
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls, 0);
+
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ addr.sin_port = 0;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ sfd = socket(AF_INET, SOCK_STREAM, 0);
+
+ ASSERT_EQ(bind(sfd, &addr, sizeof(addr)), 0);
+ ASSERT_EQ(listen(sfd, 10), 0);
+ ASSERT_EQ(getsockname(sfd, &addr, &len), 0);
+ ASSERT_EQ(connect(fd, &addr, sizeof(addr)), 0);
+ ASSERT_GE(cfd = accept(sfd, &addr, &len), 0);
+ close(sfd);
+
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+ if (ret) {
+ ASSERT_EQ(errno, ENOENT);
+ SKIP(return, "no TLS support");
+ }
+ ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0);
+
+ /* Spawn a child and get it into the read wait path of the underlying
+ * TCP socket.
+ */
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (!pid) {
+ EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2, MSG_WAITALL),
+ sizeof(buf) / 2);
+ exit(!__test_passed(_metadata));
+ }
+
+ usleep(10000);
+ ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0);
+ ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0);
+
+ EXPECT_EQ(send(fd, buf, sizeof(buf), 0), sizeof(buf));
+ EXPECT_EQ(wait(&status), pid);
+ EXPECT_EQ(status, 0);
+ EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT), -1);
+ /* Don't check errno, the error will be different depending
+ * on what random bytes TLS interpreted as the record length.
+ */
+
+ close(fd);
+ close(cfd);
+}
+
static void __attribute__((constructor)) fips_check(void) {
int res;
FILE *f;
diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config
index 6133524710f7..cf7cc0ce0248 100644
--- a/tools/testing/selftests/pidfd/config
+++ b/tools/testing/selftests/pidfd/config
@@ -4,6 +4,5 @@ CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_NET_NS=y
CONFIG_TIME_NS=y
-CONFIG_GENERIC_VDSO_TIME_NS=y
CONFIG_CGROUPS=y
CONFIG_CHECKPOINT_RESTORE=y
diff --git a/tools/testing/selftests/powerpc/include/instructions.h b/tools/testing/selftests/powerpc/include/instructions.h
index 4efa6314bd96..864f0c9f1afc 100644
--- a/tools/testing/selftests/powerpc/include/instructions.h
+++ b/tools/testing/selftests/powerpc/include/instructions.h
@@ -67,7 +67,7 @@ static inline int paste_last(void *i)
#define PPC_INST_PASTE_LAST __PASTE(0, 0, 1, 1)
/* This defines the prefixed load/store instructions */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
# define stringify_in_c(...) __VA_ARGS__
#else
# define __stringify_in_c(...) #__VA_ARGS__
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index 19bb333e2485..6b78a8382d40 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -18,6 +18,7 @@
/proc-tid0
/proc-uptime-001
/proc-uptime-002
+/proc-pidns
/read
/self
/setns-dcache
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
index 50aba102201a..be3013515aae 100644
--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -28,5 +28,6 @@ TEST_GEN_PROGS += setns-sysvipc
TEST_GEN_PROGS += thread-self
TEST_GEN_PROGS += proc-multiple-procfs
TEST_GEN_PROGS += proc-fsconfig-hidepid
+TEST_GEN_PROGS += proc-pidns
include ../lib.mk
diff --git a/tools/testing/selftests/proc/proc-maps-race.c b/tools/testing/selftests/proc/proc-maps-race.c
index 66773685a047..94bba4553130 100644
--- a/tools/testing/selftests/proc/proc-maps-race.c
+++ b/tools/testing/selftests/proc/proc-maps-race.c
@@ -202,11 +202,11 @@ static void print_first_lines(char *text, int nr)
int offs = end - text;
text[offs] = '\0';
- printf(text);
+ printf("%s", text);
text[offs] = '\n';
printf("\n");
} else {
- printf(text);
+ printf("%s", text);
}
}
@@ -221,7 +221,7 @@ static void print_last_lines(char *text, int nr)
nr--;
start--;
}
- printf(start);
+ printf("%s", start);
}
static void print_boundaries(const char *title, FIXTURE_DATA(proc_maps_race) *self)
diff --git a/tools/testing/selftests/proc/proc-pidns.c b/tools/testing/selftests/proc/proc-pidns.c
new file mode 100644
index 000000000000..52500597f951
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-pidns.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Aleksa Sarai <cyphar@cyphar.com>
+ * Copyright (C) 2025 SUSE LLC.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/prctl.h>
+
+#include "../kselftest_harness.h"
+
+#define ASSERT_ERRNO(expected, _t, seen) \
+ __EXPECT(expected, #expected, \
+ ({__typeof__(seen) _tmp_seen = (seen); \
+ _tmp_seen >= 0 ? _tmp_seen : -errno; }), #seen, _t, 1)
+
+#define ASSERT_ERRNO_EQ(expected, seen) \
+ ASSERT_ERRNO(expected, ==, seen)
+
+#define ASSERT_SUCCESS(seen) \
+ ASSERT_ERRNO(0, <=, seen)
+
+static int touch(char *path)
+{
+ int fd = open(path, O_WRONLY|O_CREAT|O_CLOEXEC, 0644);
+ if (fd < 0)
+ return -1;
+ return close(fd);
+}
+
+FIXTURE(ns)
+{
+ int host_mntns, host_pidns;
+ int dummy_pidns;
+};
+
+FIXTURE_SETUP(ns)
+{
+ /* Stash the old mntns. */
+ self->host_mntns = open("/proc/self/ns/mnt", O_RDONLY|O_CLOEXEC);
+ ASSERT_SUCCESS(self->host_mntns);
+
+ /* Create a new mount namespace and make it private. */
+ ASSERT_SUCCESS(unshare(CLONE_NEWNS));
+ ASSERT_SUCCESS(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL));
+
+ /*
+ * Create a proper tmpfs that we can use and will disappear once we
+ * leave this mntns.
+ */
+ ASSERT_SUCCESS(mount("tmpfs", "/tmp", "tmpfs", 0, NULL));
+
+ /*
+ * Create a pidns we can use for later tests. We need to fork off a
+ * child so that we get a usable nsfd that we can bind-mount and open.
+ */
+ ASSERT_SUCCESS(mkdir("/tmp/dummy", 0755));
+ ASSERT_SUCCESS(touch("/tmp/dummy/pidns"));
+ ASSERT_SUCCESS(mkdir("/tmp/dummy/proc", 0755));
+
+ self->host_pidns = open("/proc/self/ns/pid", O_RDONLY|O_CLOEXEC);
+ ASSERT_SUCCESS(self->host_pidns);
+ ASSERT_SUCCESS(unshare(CLONE_NEWPID));
+
+ pid_t pid = fork();
+ ASSERT_SUCCESS(pid);
+ if (!pid) {
+ prctl(PR_SET_PDEATHSIG, SIGKILL);
+ ASSERT_SUCCESS(mount("/proc/self/ns/pid", "/tmp/dummy/pidns", NULL, MS_BIND, NULL));
+ ASSERT_SUCCESS(mount("proc", "/tmp/dummy/proc", "proc", 0, NULL));
+ exit(0);
+ }
+
+ int wstatus;
+ ASSERT_EQ(waitpid(pid, &wstatus, 0), pid);
+ ASSERT_TRUE(WIFEXITED(wstatus));
+ ASSERT_EQ(WEXITSTATUS(wstatus), 0);
+
+ ASSERT_SUCCESS(setns(self->host_pidns, CLONE_NEWPID));
+
+ self->dummy_pidns = open("/tmp/dummy/pidns", O_RDONLY|O_CLOEXEC);
+ ASSERT_SUCCESS(self->dummy_pidns);
+}
+
+FIXTURE_TEARDOWN(ns)
+{
+ ASSERT_SUCCESS(setns(self->host_mntns, CLONE_NEWNS));
+ ASSERT_SUCCESS(close(self->host_mntns));
+
+ ASSERT_SUCCESS(close(self->host_pidns));
+ ASSERT_SUCCESS(close(self->dummy_pidns));
+}
+
+TEST_F(ns, pidns_mount_string_path)
+{
+ ASSERT_SUCCESS(mkdir("/tmp/proc-host", 0755));
+ ASSERT_SUCCESS(mount("proc", "/tmp/proc-host", "proc", 0, "pidns=/proc/self/ns/pid"));
+ ASSERT_SUCCESS(access("/tmp/proc-host/self/", X_OK));
+
+ ASSERT_SUCCESS(mkdir("/tmp/proc-dummy", 0755));
+ ASSERT_SUCCESS(mount("proc", "/tmp/proc-dummy", "proc", 0, "pidns=/tmp/dummy/pidns"));
+ ASSERT_ERRNO_EQ(-ENOENT, access("/tmp/proc-dummy/1/", X_OK));
+ ASSERT_ERRNO_EQ(-ENOENT, access("/tmp/proc-dummy/self/", X_OK));
+}
+
+TEST_F(ns, pidns_fsconfig_string_path)
+{
+ int fsfd = fsopen("proc", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_SET_STRING, "pidns", "/tmp/dummy/pidns", 0));
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0));
+
+ int mountfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0);
+ ASSERT_SUCCESS(mountfd);
+
+ ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "1/", X_OK, 0));
+ ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "self/", X_OK, 0));
+
+ ASSERT_SUCCESS(close(fsfd));
+ ASSERT_SUCCESS(close(mountfd));
+}
+
+TEST_F(ns, pidns_fsconfig_fd)
+{
+ int fsfd = fsopen("proc", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_SET_FD, "pidns", NULL, self->dummy_pidns));
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0));
+
+ int mountfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0);
+ ASSERT_SUCCESS(mountfd);
+
+ ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "1/", X_OK, 0));
+ ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "self/", X_OK, 0));
+
+ ASSERT_SUCCESS(close(fsfd));
+ ASSERT_SUCCESS(close(mountfd));
+}
+
+TEST_F(ns, pidns_reconfigure_remount)
+{
+ ASSERT_SUCCESS(mkdir("/tmp/proc", 0755));
+ ASSERT_SUCCESS(mount("proc", "/tmp/proc", "proc", 0, ""));
+
+ ASSERT_SUCCESS(access("/tmp/proc/1/", X_OK));
+ ASSERT_SUCCESS(access("/tmp/proc/self/", X_OK));
+
+ ASSERT_ERRNO_EQ(-EBUSY, mount(NULL, "/tmp/proc", NULL, MS_REMOUNT, "pidns=/tmp/dummy/pidns"));
+
+ ASSERT_SUCCESS(access("/tmp/proc/1/", X_OK));
+ ASSERT_SUCCESS(access("/tmp/proc/self/", X_OK));
+}
+
+TEST_F(ns, pidns_reconfigure_fsconfig_string_path)
+{
+ int fsfd = fsopen("proc", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0));
+
+ int mountfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0);
+ ASSERT_SUCCESS(mountfd);
+
+ ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0));
+ ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0));
+
+ ASSERT_ERRNO_EQ(-EBUSY, fsconfig(fsfd, FSCONFIG_SET_STRING, "pidns", "/tmp/dummy/pidns", 0));
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0)); /* noop */
+
+ ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0));
+ ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0));
+
+ ASSERT_SUCCESS(close(fsfd));
+ ASSERT_SUCCESS(close(mountfd));
+}
+
+TEST_F(ns, pidns_reconfigure_fsconfig_fd)
+{
+ int fsfd = fsopen("proc", FSOPEN_CLOEXEC);
+ ASSERT_SUCCESS(fsfd);
+
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0));
+
+ int mountfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0);
+ ASSERT_SUCCESS(mountfd);
+
+ ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0));
+ ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0));
+
+ ASSERT_ERRNO_EQ(-EBUSY, fsconfig(fsfd, FSCONFIG_SET_FD, "pidns", NULL, self->dummy_pidns));
+ ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0)); /* noop */
+
+ ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0));
+ ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0));
+
+ ASSERT_SUCCESS(close(fsfd));
+ ASSERT_SUCCESS(close(mountfd));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/README b/tools/testing/selftests/riscv/README
new file mode 100644
index 000000000000..443da395da68
--- /dev/null
+++ b/tools/testing/selftests/riscv/README
@@ -0,0 +1,24 @@
+KSelfTest RISC-V
+================
+
+- These tests are riscv specific and so not built or run but just skipped
+ completely when env-variable ARCH is found to be different than 'riscv'.
+
+- Holding true the above, RISC-V KSFT tests can be run within the
+ KSelfTest framework using standard Linux top-level-makefile targets:
+
+ $ make TARGETS=riscv kselftest-clean
+ $ make TARGETS=riscv kselftest
+
+ or
+
+ $ make -C tools/testing/selftests TARGETS=riscv \
+ INSTALL_PATH=<your-installation-path> install
+
+ or, alternatively, only specific riscv/ subtargets can be picked:
+
+ $ make -C tools/testing/selftests TARGETS=riscv RISCV_SUBTARGETS="mm vector" \
+ INSTALL_PATH=<your-installation-path> install
+
+ Further details on building and running KSFT can be found in:
+ Documentation/dev-tools/kselftest.rst
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index 663a9cef1952..dcac5cbe7933 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -40,9 +40,9 @@
* Define weak versions to play nice with binaries that are statically linked
* against a libc that doesn't support registering its own rseq.
*/
-__weak ptrdiff_t __rseq_offset;
-__weak unsigned int __rseq_size;
-__weak unsigned int __rseq_flags;
+extern __weak ptrdiff_t __rseq_offset;
+extern __weak unsigned int __rseq_size;
+extern __weak unsigned int __rseq_flags;
static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset;
static const unsigned int *libc_rseq_size_p = &__rseq_size;
@@ -209,7 +209,7 @@ void rseq_init(void)
* libc not having registered a restartable sequence. Try to find the
* symbols if that's the case.
*/
- if (!*libc_rseq_size_p) {
+ if (!libc_rseq_size_p || !*libc_rseq_size_p) {
libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
diff --git a/tools/testing/selftests/sched_ext/hotplug.c b/tools/testing/selftests/sched_ext/hotplug.c
index 1c9ceb661c43..0cfbb111a2d0 100644
--- a/tools/testing/selftests/sched_ext/hotplug.c
+++ b/tools/testing/selftests/sched_ext/hotplug.c
@@ -6,7 +6,6 @@
#include <bpf/bpf.h>
#include <sched.h>
#include <scx/common.h>
-#include <sched.h>
#include <sys/wait.h>
#include <unistd.h>
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 61acbd45ffaa..874f17763536 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -24,6 +24,7 @@
#include <linux/filter.h>
#include <sys/prctl.h>
#include <sys/ptrace.h>
+#include <sys/time.h>
#include <sys/user.h>
#include <linux/prctl.h>
#include <linux/ptrace.h>
@@ -73,6 +74,14 @@
#define noinline __attribute__((noinline))
#endif
+#ifndef __nocf_check
+#define __nocf_check __attribute__((nocf_check))
+#endif
+
+#ifndef __naked
+#define __naked __attribute__((__naked__))
+#endif
+
#ifndef PR_SET_NO_NEW_PRIVS
#define PR_SET_NO_NEW_PRIVS 38
#define PR_GET_NO_NEW_PRIVS 39
@@ -3547,6 +3556,10 @@ static void signal_handler(int signal)
perror("write from signal");
}
+static void signal_handler_nop(int signal)
+{
+}
+
TEST(user_notification_signal)
{
pid_t pid;
@@ -4819,6 +4832,132 @@ TEST(user_notification_wait_killable_fatal)
EXPECT_EQ(SIGTERM, WTERMSIG(status));
}
+/* Ensure signals after the reply do not interrupt */
+TEST(user_notification_wait_killable_after_reply)
+{
+ int i, max_iter = 100000;
+ int listener, status;
+ int pipe_fds[2];
+ pid_t pid;
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret)
+ {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ listener = user_notif_syscall(
+ __NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER |
+ SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
+ ASSERT_GE(listener, 0);
+
+ /*
+ * Used to count invocations. One token is transferred from the child
+ * to the parent per syscall invocation, the parent tries to take
+ * one token per successful RECV. If the syscall is restarted after
+ * RECV the parent will try to get two tokens while the child only
+ * provided one.
+ */
+ ASSERT_EQ(pipe(pipe_fds), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ struct sigaction new_action = {
+ .sa_handler = signal_handler_nop,
+ .sa_flags = SA_RESTART,
+ };
+ struct itimerval timer = {
+ .it_value = { .tv_usec = 1000 },
+ .it_interval = { .tv_usec = 1000 },
+ };
+ char c = 'a';
+
+ close(pipe_fds[0]);
+
+ /* Setup the sigaction with SA_RESTART */
+ if (sigaction(SIGALRM, &new_action, NULL)) {
+ perror("sigaction");
+ exit(1);
+ }
+
+ /*
+ * Kill with SIGALRM repeatedly, to try to hit the race when
+ * handling the syscall.
+ */
+ if (setitimer(ITIMER_REAL, &timer, NULL) < 0)
+ perror("setitimer");
+
+ for (i = 0; i < max_iter; ++i) {
+ int fd;
+
+ /* Send one token per iteration to catch repeats. */
+ if (write(pipe_fds[1], &c, sizeof(c)) != 1) {
+ perror("write");
+ exit(1);
+ }
+
+ fd = syscall(__NR_dup, 0);
+ if (fd < 0) {
+ perror("dup");
+ exit(1);
+ }
+ close(fd);
+ }
+
+ exit(0);
+ }
+
+ close(pipe_fds[1]);
+
+ for (i = 0; i < max_iter; ++i) {
+ struct seccomp_notif req = {};
+ struct seccomp_notif_addfd addfd = {};
+ struct pollfd pfd = {
+ .fd = pipe_fds[0],
+ .events = POLLIN,
+ };
+ char c;
+
+ /*
+ * Try to receive one token. If it failed, one child syscall
+ * was restarted after RECV and needed to be handled twice.
+ */
+ ASSERT_EQ(poll(&pfd, 1, 1000), 1)
+ kill(pid, SIGKILL);
+
+ ASSERT_EQ(read(pipe_fds[0], &c, sizeof(c)), 1)
+ kill(pid, SIGKILL);
+
+ /*
+ * Get the notification, reply to it as fast as possible to test
+ * whether the child wrongly skips going into the non-preemptible
+ * (TASK_KILLABLE) state.
+ */
+ do
+ ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
+ while (ret < 0 && errno == ENOENT); /* Accept interruptions before RECV */
+ ASSERT_EQ(ret, 0)
+ kill(pid, SIGKILL);
+
+ addfd.id = req.id;
+ addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
+ addfd.srcfd = 0;
+ ASSERT_GE(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), 0)
+ kill(pid, SIGKILL);
+ }
+
+ /*
+ * Wait for the process to exit, and make sure the process terminated
+ * with a zero exit code..
+ */
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
struct tsync_vs_thread_leader_args {
pthread_t leader;
};
@@ -4896,7 +5035,36 @@ TEST(tsync_vs_dead_thread_leader)
EXPECT_EQ(0, status);
}
-noinline int probed(void)
+#ifdef __x86_64__
+
+/*
+ * We need naked probed_uprobe function. Using __nocf_check
+ * check to skip possible endbr64 instruction and ignoring
+ * -Wattributes, otherwise the compilation might fail.
+ */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wattributes"
+
+__naked __nocf_check noinline int probed_uprobe(void)
+{
+ /*
+ * Optimized uprobe is possible only on top of nop5 instruction.
+ */
+ asm volatile (" \n"
+ ".byte 0x0f, 0x1f, 0x44, 0x00, 0x00 \n"
+ "ret \n"
+ );
+}
+#pragma GCC diagnostic pop
+
+#else
+noinline int probed_uprobe(void)
+{
+ return 1;
+}
+#endif
+
+noinline int probed_uretprobe(void)
{
return 1;
}
@@ -4949,35 +5117,46 @@ static ssize_t get_uprobe_offset(const void *addr)
return found ? (uintptr_t)addr - start + base : -1;
}
-FIXTURE(URETPROBE) {
+FIXTURE(UPROBE) {
int fd;
};
-FIXTURE_VARIANT(URETPROBE) {
+FIXTURE_VARIANT(UPROBE) {
/*
- * All of the URETPROBE behaviors can be tested with either
- * uretprobe attached or not
+ * All of the U(RET)PROBE behaviors can be tested with either
+ * u(ret)probe attached or not
*/
bool attach;
+ /*
+ * Test both uprobe and uretprobe.
+ */
+ bool uretprobe;
+};
+
+FIXTURE_VARIANT_ADD(UPROBE, not_attached) {
+ .attach = false,
+ .uretprobe = false,
};
-FIXTURE_VARIANT_ADD(URETPROBE, attached) {
+FIXTURE_VARIANT_ADD(UPROBE, uprobe_attached) {
.attach = true,
+ .uretprobe = false,
};
-FIXTURE_VARIANT_ADD(URETPROBE, not_attached) {
- .attach = false,
+FIXTURE_VARIANT_ADD(UPROBE, uretprobe_attached) {
+ .attach = true,
+ .uretprobe = true,
};
-FIXTURE_SETUP(URETPROBE)
+FIXTURE_SETUP(UPROBE)
{
const size_t attr_sz = sizeof(struct perf_event_attr);
struct perf_event_attr attr;
ssize_t offset;
int type, bit;
-#ifndef __NR_uretprobe
- SKIP(return, "__NR_uretprobe syscall not defined");
+#if !defined(__NR_uprobe) || !defined(__NR_uretprobe)
+ SKIP(return, "__NR_uprobe ot __NR_uretprobe syscalls not defined");
#endif
if (!variant->attach)
@@ -4987,12 +5166,17 @@ FIXTURE_SETUP(URETPROBE)
type = determine_uprobe_perf_type();
ASSERT_GE(type, 0);
- bit = determine_uprobe_retprobe_bit();
- ASSERT_GE(bit, 0);
- offset = get_uprobe_offset(probed);
+
+ if (variant->uretprobe) {
+ bit = determine_uprobe_retprobe_bit();
+ ASSERT_GE(bit, 0);
+ }
+
+ offset = get_uprobe_offset(variant->uretprobe ? probed_uretprobe : probed_uprobe);
ASSERT_GE(offset, 0);
- attr.config |= 1 << bit;
+ if (variant->uretprobe)
+ attr.config |= 1 << bit;
attr.size = attr_sz;
attr.type = type;
attr.config1 = ptr_to_u64("/proc/self/exe");
@@ -5003,7 +5187,7 @@ FIXTURE_SETUP(URETPROBE)
PERF_FLAG_FD_CLOEXEC);
}
-FIXTURE_TEARDOWN(URETPROBE)
+FIXTURE_TEARDOWN(UPROBE)
{
/* we could call close(self->fd), but we'd need extra filter for
* that and since we are calling _exit right away..
@@ -5017,11 +5201,17 @@ static int run_probed_with_filter(struct sock_fprog *prog)
return -1;
}
- probed();
+ /*
+ * Uprobe is optimized after first hit, so let's hit twice.
+ */
+ probed_uprobe();
+ probed_uprobe();
+
+ probed_uretprobe();
return 0;
}
-TEST_F(URETPROBE, uretprobe_default_allow)
+TEST_F(UPROBE, uprobe_default_allow)
{
struct sock_filter filter[] = {
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
@@ -5034,7 +5224,7 @@ TEST_F(URETPROBE, uretprobe_default_allow)
ASSERT_EQ(0, run_probed_with_filter(&prog));
}
-TEST_F(URETPROBE, uretprobe_default_block)
+TEST_F(UPROBE, uprobe_default_block)
{
struct sock_filter filter[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
@@ -5051,11 +5241,14 @@ TEST_F(URETPROBE, uretprobe_default_block)
ASSERT_EQ(0, run_probed_with_filter(&prog));
}
-TEST_F(URETPROBE, uretprobe_block_uretprobe_syscall)
+TEST_F(UPROBE, uprobe_block_syscall)
{
struct sock_filter filter[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
+#ifdef __NR_uprobe
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uprobe, 1, 2),
+#endif
#ifdef __NR_uretprobe
BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 0, 1),
#endif
@@ -5070,11 +5263,14 @@ TEST_F(URETPROBE, uretprobe_block_uretprobe_syscall)
ASSERT_EQ(0, run_probed_with_filter(&prog));
}
-TEST_F(URETPROBE, uretprobe_default_block_with_uretprobe_syscall)
+TEST_F(UPROBE, uprobe_default_block_with_syscall)
{
struct sock_filter filter[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
+#ifdef __NR_uprobe
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uprobe, 3, 0),
+#endif
#ifdef __NR_uretprobe
BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 2, 0),
#endif
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index 23a61e5b99d0..998e5a2f4579 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -186,6 +186,204 @@
]
},
{
+ "id": "34c0",
+ "name": "Test TBF with HHF Backlog Accounting in gso_skb case against underflow",
+ "category": [
+ "qdisc",
+ "tbf",
+ "hhf"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin"
+ ]
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms",
+ "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 hhf limit 1000",
+ [
+ "ping -I $DUMMY -c2 10.10.11.11",
+ 1
+ ],
+ "$TC qdisc change dev $DUMMY handle 2: parent 1:1 hhf limit 1"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "backlog 0b 0p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "fd68",
+ "name": "Test TBF with CODEL Backlog Accounting in gso_skb case against underflow",
+ "category": [
+ "qdisc",
+ "tbf",
+ "codel"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin"
+ ]
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms",
+ "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 codel limit 1000",
+ [
+ "ping -I $DUMMY -c2 10.10.11.11",
+ 1
+ ],
+ "$TC qdisc change dev $DUMMY handle 2: parent 1:1 codel limit 1"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "backlog 0b 0p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "514e",
+ "name": "Test TBF with PIE Backlog Accounting in gso_skb case against underflow",
+ "category": [
+ "qdisc",
+ "tbf",
+ "pie"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin"
+ ]
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms",
+ "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 pie limit 1000",
+ [
+ "ping -I $DUMMY -c2 10.10.11.11",
+ 1
+ ],
+ "$TC qdisc change dev $DUMMY handle 2: parent 1:1 pie limit 1"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "backlog 0b 0p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "6c97",
+ "name": "Test TBF with FQ Backlog Accounting in gso_skb case against underflow",
+ "category": [
+ "qdisc",
+ "tbf",
+ "fq"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin"
+ ]
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms",
+ "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 fq limit 1000",
+ [
+ "ping -I $DUMMY -c2 10.10.11.11",
+ 1
+ ],
+ "$TC qdisc change dev $DUMMY handle 2: parent 1:1 fq limit 1"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "backlog 0b 0p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "5d0b",
+ "name": "Test TBF with FQ_CODEL Backlog Accounting in gso_skb case against underflow",
+ "category": [
+ "qdisc",
+ "tbf",
+ "fq_codel"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin"
+ ]
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms",
+ "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 fq_codel limit 1000",
+ [
+ "ping -I $DUMMY -c2 10.10.11.11",
+ 1
+ ],
+ "$TC qdisc change dev $DUMMY handle 2: parent 1:1 fq_codel limit 1"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "backlog 0b 0p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "21c3",
+ "name": "Test TBF with FQ_PIE Backlog Accounting in gso_skb case against underflow",
+ "category": [
+ "qdisc",
+ "tbf",
+ "fq_pie"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin"
+ ]
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.11.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms",
+ "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 fq_pie limit 1000",
+ [
+ "ping -I $DUMMY -c2 10.10.11.11",
+ 1
+ ],
+ "$TC qdisc change dev $DUMMY handle 2: parent 1:1 fq_pie limit 1"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "backlog 0b 0p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
"id": "a4bb",
"name": "Test FQ_CODEL with HTB parent - force packet drop with empty queue",
"category": [
diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c
index 2d93ac860bd5..cd9fe69ecce2 100644
--- a/tools/testing/selftests/ublk/file_backed.c
+++ b/tools/testing/selftests/ublk/file_backed.c
@@ -20,7 +20,7 @@ static int loop_queue_flush_io(struct ublk_thread *t, struct ublk_queue *q,
struct io_uring_sqe *sqe[1];
ublk_io_alloc_sqes(t, sqe, 1);
- io_uring_prep_fsync(sqe[0], 1 /*fds[1]*/, IORING_FSYNC_DATASYNC);
+ io_uring_prep_fsync(sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/, IORING_FSYNC_DATASYNC);
io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
/* bit63 marks us as tgt io */
sqe[0]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
@@ -42,7 +42,7 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
if (!sqe[0])
return -ENOMEM;
- io_uring_prep_rw(op, sqe[0], 1 /*fds[1]*/,
+ io_uring_prep_rw(op, sqe[0], ublk_get_registered_fd(q, 1) /*fds[1]*/,
addr,
iod->nr_sectors << 9,
iod->start_sector << 9);
@@ -56,19 +56,19 @@ static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
ublk_io_alloc_sqes(t, sqe, 3);
- io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
+ io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
sqe[0]->user_data = build_user_data(tag,
ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
- io_uring_prep_rw(op, sqe[1], 1 /*fds[1]*/, 0,
+ io_uring_prep_rw(op, sqe[1], ublk_get_registered_fd(q, 1) /*fds[1]*/, 0,
iod->nr_sectors << 9,
iod->start_sector << 9);
sqe[1]->buf_index = tag;
sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK;
sqe[1]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
- io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
+ io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1);
return 2;
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index 95188065b2e9..b71faba86c3b 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -432,7 +432,7 @@ static void ublk_thread_deinit(struct ublk_thread *t)
}
}
-static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags)
+static int ublk_queue_init(struct ublk_queue *q, unsigned long long extra_flags)
{
struct ublk_dev *dev = q->dev;
int depth = dev->dev_info.queue_depth;
@@ -446,6 +446,9 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags)
q->flags = dev->dev_info.flags;
q->flags |= extra_flags;
+ /* Cache fd in queue for fast path access */
+ q->ublk_fd = dev->fds[0];
+
cmd_buf_size = ublk_queue_cmd_buf_sz(q);
off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz();
q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ,
@@ -481,9 +484,10 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags)
return -ENOMEM;
}
-static int ublk_thread_init(struct ublk_thread *t)
+static int ublk_thread_init(struct ublk_thread *t, unsigned long long extra_flags)
{
struct ublk_dev *dev = t->dev;
+ unsigned long long flags = dev->dev_info.flags | extra_flags;
int ring_depth = dev->tgt.sq_depth, cq_depth = dev->tgt.cq_depth;
int ret;
@@ -512,7 +516,17 @@ static int ublk_thread_init(struct ublk_thread *t)
io_uring_register_ring_fd(&t->ring);
- ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds);
+ if (flags & UBLKS_Q_NO_UBLK_FIXED_FD) {
+ /* Register only backing files starting from index 1, exclude ublk control device */
+ if (dev->nr_fds > 1) {
+ ret = io_uring_register_files(&t->ring, &dev->fds[1], dev->nr_fds - 1);
+ } else {
+ /* No backing files to register, skip file registration */
+ ret = 0;
+ }
+ } else {
+ ret = io_uring_register_files(&t->ring, dev->fds, dev->nr_fds);
+ }
if (ret) {
ublk_err("ublk dev %d thread %d register files failed %d\n",
t->dev->dev_info.dev_id, t->idx, ret);
@@ -626,9 +640,12 @@ int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io)
/* These fields should be written once, never change */
ublk_set_sqe_cmd_op(sqe[0], cmd_op);
- sqe[0]->fd = 0; /* dev->fds[0] */
+ sqe[0]->fd = ublk_get_registered_fd(q, 0); /* dev->fds[0] */
sqe[0]->opcode = IORING_OP_URING_CMD;
- sqe[0]->flags = IOSQE_FIXED_FILE;
+ if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD)
+ sqe[0]->flags = 0; /* Use raw FD, not fixed file */
+ else
+ sqe[0]->flags = IOSQE_FIXED_FILE;
sqe[0]->rw_flags = 0;
cmd->tag = io->tag;
cmd->q_id = q->q_id;
@@ -832,6 +849,7 @@ struct ublk_thread_info {
unsigned idx;
sem_t *ready;
cpu_set_t *affinity;
+ unsigned long long extra_flags;
};
static void *ublk_io_handler_fn(void *data)
@@ -844,7 +862,7 @@ static void *ublk_io_handler_fn(void *data)
t->dev = info->dev;
t->idx = info->idx;
- ret = ublk_thread_init(t);
+ ret = ublk_thread_init(t, info->extra_flags);
if (ret) {
ublk_err("ublk dev %d thread %u init failed\n",
dev_id, t->idx);
@@ -934,6 +952,8 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
if (ctx->auto_zc_fallback)
extra_flags = UBLKS_Q_AUTO_BUF_REG_FALLBACK;
+ if (ctx->no_ublk_fixed_fd)
+ extra_flags |= UBLKS_Q_NO_UBLK_FIXED_FD;
for (i = 0; i < dinfo->nr_hw_queues; i++) {
dev->q[i].dev = dev;
@@ -951,6 +971,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
tinfo[i].dev = dev;
tinfo[i].idx = i;
tinfo[i].ready = &ready;
+ tinfo[i].extra_flags = extra_flags;
/*
* If threads are not tied 1:1 to queues, setting thread
@@ -1400,7 +1421,7 @@ static int cmd_dev_get_features(void)
if (!((1ULL << i) & features))
continue;
- if (i < sizeof(feat_map) / sizeof(feat_map[0]))
+ if (i < ARRAY_SIZE(feat_map))
feat = feat_map[i];
else
feat = "unknown";
@@ -1471,13 +1492,13 @@ static void __cmd_create_help(char *exe, bool recovery)
printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n",
exe, recovery ? "recover" : "add");
printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n");
- printf("\t[-e 0|1 ] [-i 0|1]\n");
+ printf("\t[-e 0|1 ] [-i 0|1] [--no_ublk_fixed_fd]\n");
printf("\t[--nthreads threads] [--per_io_tasks]\n");
printf("\t[target options] [backfile1] [backfile2] ...\n");
printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n");
printf("\tdefault: nthreads=nr_queues");
- for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) {
const struct ublk_tgt_ops *ops = tgt_ops_list[i];
if (ops->usage)
@@ -1534,6 +1555,7 @@ int main(int argc, char *argv[])
{ "size", 1, NULL, 's'},
{ "nthreads", 1, NULL, 0 },
{ "per_io_tasks", 0, NULL, 0 },
+ { "no_ublk_fixed_fd", 0, NULL, 0 },
{ 0, 0, 0, 0 }
};
const struct ublk_tgt_ops *ops = NULL;
@@ -1613,6 +1635,8 @@ int main(int argc, char *argv[])
ctx.nthreads = strtol(optarg, NULL, 10);
if (!strcmp(longopts[option_idx].name, "per_io_tasks"))
ctx.per_io_tasks = 1;
+ if (!strcmp(longopts[option_idx].name, "no_ublk_fixed_fd"))
+ ctx.no_ublk_fixed_fd = 1;
break;
case '?':
/*
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index 219233f8a053..5e55484fb0aa 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -77,6 +77,7 @@ struct dev_ctx {
unsigned int recovery:1;
unsigned int auto_zc_fallback:1;
unsigned int per_io_tasks:1;
+ unsigned int no_ublk_fixed_fd:1;
int _evtfd;
int _shmid;
@@ -166,7 +167,9 @@ struct ublk_queue {
/* borrow one bit of ublk uapi flags, which may never be used */
#define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63)
+#define UBLKS_Q_NO_UBLK_FIXED_FD (1ULL << 62)
__u64 flags;
+ int ublk_fd; /* cached ublk char device fd */
struct ublk_io ios[UBLK_QUEUE_DEPTH];
};
@@ -273,34 +276,48 @@ static inline int ublk_io_alloc_sqes(struct ublk_thread *t,
return nr_sqes;
}
-static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe,
- int dev_fd, int tag, int q_id, __u64 index)
+static inline int ublk_get_registered_fd(struct ublk_queue *q, int fd_index)
+{
+ if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD) {
+ if (fd_index == 0)
+ /* Return the raw ublk FD for index 0 */
+ return q->ublk_fd;
+ /* Adjust index for backing files (index 1 becomes 0, etc.) */
+ return fd_index - 1;
+ }
+ return fd_index;
+}
+
+static inline void __io_uring_prep_buf_reg_unreg(struct io_uring_sqe *sqe,
+ struct ublk_queue *q, int tag, int q_id, __u64 index)
{
struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd;
+ int dev_fd = ublk_get_registered_fd(q, 0);
io_uring_prep_read(sqe, dev_fd, 0, 0, 0);
sqe->opcode = IORING_OP_URING_CMD;
- sqe->flags |= IOSQE_FIXED_FILE;
- sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF;
+ if (q->flags & UBLKS_Q_NO_UBLK_FIXED_FD)
+ sqe->flags &= ~IOSQE_FIXED_FILE;
+ else
+ sqe->flags |= IOSQE_FIXED_FILE;
cmd->tag = tag;
cmd->addr = index;
cmd->q_id = q_id;
}
-static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe,
- int dev_fd, int tag, int q_id, __u64 index)
+static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe,
+ struct ublk_queue *q, int tag, int q_id, __u64 index)
{
- struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd;
+ __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index);
+ sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF;
+}
- io_uring_prep_read(sqe, dev_fd, 0, 0, 0);
- sqe->opcode = IORING_OP_URING_CMD;
- sqe->flags |= IOSQE_FIXED_FILE;
+static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe,
+ struct ublk_queue *q, int tag, int q_id, __u64 index)
+{
+ __io_uring_prep_buf_reg_unreg(sqe, q, tag, q_id, index);
sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF;
-
- cmd->tag = tag;
- cmd->addr = index;
- cmd->q_id = q_id;
}
static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c
index f0e0003a4860..280043f6b689 100644
--- a/tools/testing/selftests/ublk/null.c
+++ b/tools/testing/selftests/ublk/null.c
@@ -63,7 +63,7 @@ static int null_queue_zc_io(struct ublk_thread *t, struct ublk_queue *q,
ublk_io_alloc_sqes(t, sqe, 3);
- io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
+ io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
sqe[0]->user_data = build_user_data(tag,
ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
@@ -71,7 +71,7 @@ static int null_queue_zc_io(struct ublk_thread *t, struct ublk_queue *q,
__setup_nop_io(tag, iod, sqe[1], q->q_id);
sqe[1]->flags |= IOSQE_IO_HARDLINK;
- io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
+ io_uring_prep_buf_unregister(sqe[2], q, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1);
// buf register is marked as IOSQE_CQE_SKIP_SUCCESS
diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c
index 1fb9b7cc281b..791fa8dc1651 100644
--- a/tools/testing/selftests/ublk/stripe.c
+++ b/tools/testing/selftests/ublk/stripe.c
@@ -142,7 +142,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
ublk_io_alloc_sqes(t, sqe, s->nr + extra);
if (zc) {
- io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, io->buf_index);
+ io_uring_prep_buf_register(sqe[0], q, tag, q->q_id, io->buf_index);
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
sqe[0]->user_data = build_user_data(tag,
ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
@@ -168,7 +168,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q,
if (zc) {
struct io_uring_sqe *unreg = sqe[s->nr + 1];
- io_uring_prep_buf_unregister(unreg, 0, tag, q->q_id, io->buf_index);
+ io_uring_prep_buf_unregister(unreg, q, tag, q->q_id, io->buf_index);
unreg->user_data = build_user_data(
tag, ublk_cmd_op_nr(unreg->cmd_op), 0, q->q_id, 1);
}
diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh
index 40d1437ca298..3f901db4d09d 100755
--- a/tools/testing/selftests/ublk/test_stress_04.sh
+++ b/tools/testing/selftests/ublk/test_stress_04.sh
@@ -28,14 +28,14 @@ _create_backfile 0 256M
_create_backfile 1 128M
_create_backfile 2 128M
-ublk_io_and_kill_daemon 8G -t null -q 4 -z &
-ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
+ublk_io_and_kill_daemon 8G -t null -q 4 -z --no_ublk_fixed_fd &
+ublk_io_and_kill_daemon 256M -t loop -q 4 -z --no_ublk_fixed_fd "${UBLK_BACKFILES[0]}" &
ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
if _have_feature "AUTO_BUF_REG"; then
ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc &
ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" &
- ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+ ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc --no_ublk_fixed_fd "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback &
fi
diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore
index 30d5c8f0e5c7..ba322a353aff 100644
--- a/tools/testing/selftests/vDSO/.gitignore
+++ b/tools/testing/selftests/vDSO/.gitignore
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
vdso_test
vdso_test_abi
-vdso_test_clock_getres
vdso_test_correctness
vdso_test_gettimeofday
vdso_test_getcpu
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 918a2caa070e..e361aca22a74 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -4,7 +4,6 @@ include ../../../scripts/Makefile.arch
TEST_GEN_PROGS := vdso_test_gettimeofday
TEST_GEN_PROGS += vdso_test_getcpu
TEST_GEN_PROGS += vdso_test_abi
-TEST_GEN_PROGS += vdso_test_clock_getres
ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
TEST_GEN_PROGS += vdso_standalone_test_x86
endif
@@ -29,7 +28,6 @@ CFLAGS_NOLIBC := -nostdlib -nostdinc -ffreestanding -fno-asynchronous-unwind-tab
$(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
$(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
$(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c
-$(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c
$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c | headers
$(OUTPUT)/vdso_standalone_test_x86: CFLAGS:=$(CFLAGS_NOLIBC) $(CFLAGS)
diff --git a/tools/testing/selftests/vDSO/vdso_call.h b/tools/testing/selftests/vDSO/vdso_call.h
index bb237d771051..e7205584cbdc 100644
--- a/tools/testing/selftests/vDSO/vdso_call.h
+++ b/tools/testing/selftests/vDSO/vdso_call.h
@@ -44,7 +44,6 @@
register long _r6 asm ("r6"); \
register long _r7 asm ("r7"); \
register long _r8 asm ("r8"); \
- register long _rval asm ("r3"); \
\
LOADARGS_##nr(fn, args); \
\
@@ -54,13 +53,13 @@
" bns+ 1f\n" \
" neg 3, 3\n" \
"1:" \
- : "+r" (_r0), "=r" (_r3), "+r" (_r4), "+r" (_r5), \
+ : "+r" (_r0), "+r" (_r3), "+r" (_r4), "+r" (_r5), \
"+r" (_r6), "+r" (_r7), "+r" (_r8) \
- : "r" (_rval) \
+ : \
: "r9", "r10", "r11", "r12", "cr0", "cr1", "cr5", \
"cr6", "cr7", "xer", "lr", "ctr", "memory" \
); \
- _rval; \
+ _r3; \
})
#else
diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c
index a54424e2336f..238d609a457a 100644
--- a/tools/testing/selftests/vDSO/vdso_test_abi.c
+++ b/tools/testing/selftests/vDSO/vdso_test_abi.c
@@ -26,24 +26,31 @@
static const char *version;
static const char **name;
+/* The same as struct __kernel_timespec */
+struct vdso_timespec64 {
+ uint64_t tv_sec;
+ uint64_t tv_nsec;
+};
+
typedef long (*vdso_gettimeofday_t)(struct timeval *tv, struct timezone *tz);
typedef long (*vdso_clock_gettime_t)(clockid_t clk_id, struct timespec *ts);
+typedef long (*vdso_clock_gettime64_t)(clockid_t clk_id, struct vdso_timespec64 *ts);
typedef long (*vdso_clock_getres_t)(clockid_t clk_id, struct timespec *ts);
typedef time_t (*vdso_time_t)(time_t *t);
-const char *vdso_clock_name[12] = {
- "CLOCK_REALTIME",
- "CLOCK_MONOTONIC",
- "CLOCK_PROCESS_CPUTIME_ID",
- "CLOCK_THREAD_CPUTIME_ID",
- "CLOCK_MONOTONIC_RAW",
- "CLOCK_REALTIME_COARSE",
- "CLOCK_MONOTONIC_COARSE",
- "CLOCK_BOOTTIME",
- "CLOCK_REALTIME_ALARM",
- "CLOCK_BOOTTIME_ALARM",
- "CLOCK_SGI_CYCLE",
- "CLOCK_TAI",
+static const char * const vdso_clock_name[] = {
+ [CLOCK_REALTIME] = "CLOCK_REALTIME",
+ [CLOCK_MONOTONIC] = "CLOCK_MONOTONIC",
+ [CLOCK_PROCESS_CPUTIME_ID] = "CLOCK_PROCESS_CPUTIME_ID",
+ [CLOCK_THREAD_CPUTIME_ID] = "CLOCK_THREAD_CPUTIME_ID",
+ [CLOCK_MONOTONIC_RAW] = "CLOCK_MONOTONIC_RAW",
+ [CLOCK_REALTIME_COARSE] = "CLOCK_REALTIME_COARSE",
+ [CLOCK_MONOTONIC_COARSE] = "CLOCK_MONOTONIC_COARSE",
+ [CLOCK_BOOTTIME] = "CLOCK_BOOTTIME",
+ [CLOCK_REALTIME_ALARM] = "CLOCK_REALTIME_ALARM",
+ [CLOCK_BOOTTIME_ALARM] = "CLOCK_BOOTTIME_ALARM",
+ [10 /* CLOCK_SGI_CYCLE */] = "CLOCK_SGI_CYCLE",
+ [CLOCK_TAI] = "CLOCK_TAI",
};
static void vdso_test_gettimeofday(void)
@@ -70,6 +77,33 @@ static void vdso_test_gettimeofday(void)
}
}
+static void vdso_test_clock_gettime64(clockid_t clk_id)
+{
+ /* Find clock_gettime64. */
+ vdso_clock_gettime64_t vdso_clock_gettime64 =
+ (vdso_clock_gettime64_t)vdso_sym(version, name[5]);
+
+ if (!vdso_clock_gettime64) {
+ ksft_print_msg("Couldn't find %s\n", name[5]);
+ ksft_test_result_skip("%s %s\n", name[5],
+ vdso_clock_name[clk_id]);
+ return;
+ }
+
+ struct vdso_timespec64 ts;
+ long ret = VDSO_CALL(vdso_clock_gettime64, 2, clk_id, &ts);
+
+ if (ret == 0) {
+ ksft_print_msg("The time is %lld.%06lld\n",
+ (long long)ts.tv_sec, (long long)ts.tv_nsec);
+ ksft_test_result_pass("%s %s\n", name[5],
+ vdso_clock_name[clk_id]);
+ } else {
+ ksft_test_result_fail("%s %s\n", name[5],
+ vdso_clock_name[clk_id]);
+ }
+}
+
static void vdso_test_clock_gettime(clockid_t clk_id)
{
/* Find clock_gettime. */
@@ -171,23 +205,23 @@ static inline void vdso_test_clock(clockid_t clock_id)
ksft_print_msg("clock_id: %s\n", vdso_clock_name[clock_id]);
vdso_test_clock_gettime(clock_id);
+ vdso_test_clock_gettime64(clock_id);
vdso_test_clock_getres(clock_id);
}
-#define VDSO_TEST_PLAN 16
+#define VDSO_TEST_PLAN 29
int main(int argc, char **argv)
{
unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
ksft_print_header();
- ksft_set_plan(VDSO_TEST_PLAN);
- if (!sysinfo_ehdr) {
- ksft_print_msg("AT_SYSINFO_EHDR is not present!\n");
- return KSFT_SKIP;
- }
+ if (!sysinfo_ehdr)
+ ksft_exit_skip("AT_SYSINFO_EHDR is not present!\n");
+
+ ksft_set_plan(VDSO_TEST_PLAN);
version = versions[VDSO_VERSION];
name = (const char **)&names[VDSO_NAMES];
@@ -198,40 +232,17 @@ int main(int argc, char **argv)
vdso_test_gettimeofday();
-#if _POSIX_TIMERS > 0
-
-#ifdef CLOCK_REALTIME
vdso_test_clock(CLOCK_REALTIME);
-#endif
-
-#ifdef CLOCK_BOOTTIME
vdso_test_clock(CLOCK_BOOTTIME);
-#endif
-
-#ifdef CLOCK_TAI
vdso_test_clock(CLOCK_TAI);
-#endif
-
-#ifdef CLOCK_REALTIME_COARSE
vdso_test_clock(CLOCK_REALTIME_COARSE);
-#endif
-
-#ifdef CLOCK_MONOTONIC
vdso_test_clock(CLOCK_MONOTONIC);
-#endif
-
-#ifdef CLOCK_MONOTONIC_RAW
vdso_test_clock(CLOCK_MONOTONIC_RAW);
-#endif
-
-#ifdef CLOCK_MONOTONIC_COARSE
vdso_test_clock(CLOCK_MONOTONIC_COARSE);
-#endif
-
-#endif
+ vdso_test_clock(CLOCK_PROCESS_CPUTIME_ID);
+ vdso_test_clock(CLOCK_THREAD_CPUTIME_ID);
vdso_test_time();
- ksft_print_cnts();
- return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
deleted file mode 100644
index b5d5f59f725a..000000000000
--- a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
+++ /dev/null
@@ -1,123 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
-/*
- * vdso_clock_getres.c: Sample code to test clock_getres.
- * Copyright (c) 2019 Arm Ltd.
- *
- * Compile with:
- * gcc -std=gnu99 vdso_clock_getres.c
- *
- * Tested on ARM, ARM64, MIPS32, x86 (32-bit and 64-bit),
- * Power (32-bit and 64-bit), S390x (32-bit and 64-bit).
- * Might work on other architectures.
- */
-
-#define _GNU_SOURCE
-#include <elf.h>
-#include <fcntl.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include <sys/auxv.h>
-#include <sys/mman.h>
-#include <sys/time.h>
-#include <unistd.h>
-#include <sys/syscall.h>
-
-#include "../kselftest.h"
-
-static long syscall_clock_getres(clockid_t _clkid, struct timespec *_ts)
-{
- long ret;
-
- ret = syscall(SYS_clock_getres, _clkid, _ts);
-
- return ret;
-}
-
-const char *vdso_clock_name[12] = {
- "CLOCK_REALTIME",
- "CLOCK_MONOTONIC",
- "CLOCK_PROCESS_CPUTIME_ID",
- "CLOCK_THREAD_CPUTIME_ID",
- "CLOCK_MONOTONIC_RAW",
- "CLOCK_REALTIME_COARSE",
- "CLOCK_MONOTONIC_COARSE",
- "CLOCK_BOOTTIME",
- "CLOCK_REALTIME_ALARM",
- "CLOCK_BOOTTIME_ALARM",
- "CLOCK_SGI_CYCLE",
- "CLOCK_TAI",
-};
-
-/*
- * This function calls clock_getres in vdso and by system call
- * with different values for clock_id.
- *
- * Example of output:
- *
- * clock_id: CLOCK_REALTIME [PASS]
- * clock_id: CLOCK_BOOTTIME [PASS]
- * clock_id: CLOCK_TAI [PASS]
- * clock_id: CLOCK_REALTIME_COARSE [PASS]
- * clock_id: CLOCK_MONOTONIC [PASS]
- * clock_id: CLOCK_MONOTONIC_RAW [PASS]
- * clock_id: CLOCK_MONOTONIC_COARSE [PASS]
- */
-static inline int vdso_test_clock(unsigned int clock_id)
-{
- struct timespec x, y;
-
- printf("clock_id: %s", vdso_clock_name[clock_id]);
- clock_getres(clock_id, &x);
- syscall_clock_getres(clock_id, &y);
-
- if ((x.tv_sec != y.tv_sec) || (x.tv_nsec != y.tv_nsec)) {
- printf(" [FAIL]\n");
- return KSFT_FAIL;
- }
-
- printf(" [PASS]\n");
- return KSFT_PASS;
-}
-
-int main(int argc, char **argv)
-{
- int ret = 0;
-
-#if _POSIX_TIMERS > 0
-
-#ifdef CLOCK_REALTIME
- ret += vdso_test_clock(CLOCK_REALTIME);
-#endif
-
-#ifdef CLOCK_BOOTTIME
- ret += vdso_test_clock(CLOCK_BOOTTIME);
-#endif
-
-#ifdef CLOCK_TAI
- ret += vdso_test_clock(CLOCK_TAI);
-#endif
-
-#ifdef CLOCK_REALTIME_COARSE
- ret += vdso_test_clock(CLOCK_REALTIME_COARSE);
-#endif
-
-#ifdef CLOCK_MONOTONIC
- ret += vdso_test_clock(CLOCK_MONOTONIC);
-#endif
-
-#ifdef CLOCK_MONOTONIC_RAW
- ret += vdso_test_clock(CLOCK_MONOTONIC_RAW);
-#endif
-
-#ifdef CLOCK_MONOTONIC_COARSE
- ret += vdso_test_clock(CLOCK_MONOTONIC_COARSE);
-#endif
-
-#endif
- if (ret > 0)
- return KSFT_FAIL;
-
- return KSFT_PASS;
-}
diff --git a/tools/testing/shared/linux/idr.h b/tools/testing/shared/linux/idr.h
index 4e342f2e37cf..676c5564e33f 100644
--- a/tools/testing/shared/linux/idr.h
+++ b/tools/testing/shared/linux/idr.h
@@ -1 +1,5 @@
+/* Avoid duplicate definitions due to system headers. */
+#ifdef __CONCAT
+#undef __CONCAT
+#endif
#include "../../../../include/linux/idr.h"
diff --git a/tools/tracing/latency/Makefile.config b/tools/tracing/latency/Makefile.config
index 0fe6b50f029b..6efa13e3ca93 100644
--- a/tools/tracing/latency/Makefile.config
+++ b/tools/tracing/latency/Makefile.config
@@ -1,7 +1,15 @@
# SPDX-License-Identifier: GPL-2.0-only
+include $(srctree)/tools/scripts/utilities.mak
+
STOP_ERROR :=
+ifndef ($(NO_LIBTRACEEVENT),1)
+ ifeq ($(call get-executable,$(PKG_CONFIG)),)
+ $(error Error: $(PKG_CONFIG) needed by libtraceevent/libtracefs is missing on this system, please install it)
+ endif
+endif
+
define lib_setup
$(eval LIB_INCLUDES += $(shell sh -c "$(PKG_CONFIG) --cflags lib$(1)"))
$(eval LDFLAGS += $(shell sh -c "$(PKG_CONFIG) --libs-only-L lib$(1)"))
diff --git a/tools/tracing/rtla/Makefile.config b/tools/tracing/rtla/Makefile.config
index 5f2231d8d626..07ff5e8f3006 100644
--- a/tools/tracing/rtla/Makefile.config
+++ b/tools/tracing/rtla/Makefile.config
@@ -1,10 +1,18 @@
# SPDX-License-Identifier: GPL-2.0-only
+include $(srctree)/tools/scripts/utilities.mak
+
STOP_ERROR :=
LIBTRACEEVENT_MIN_VERSION = 1.5
LIBTRACEFS_MIN_VERSION = 1.6
+ifndef ($(NO_LIBTRACEEVENT),1)
+ ifeq ($(call get-executable,$(PKG_CONFIG)),)
+ $(error Error: $(PKG_CONFIG) needed by libtraceevent/libtracefs is missing on this system, please install it)
+ endif
+endif
+
define lib_setup
$(eval LIB_INCLUDES += $(shell sh -c "$(PKG_CONFIG) --cflags lib$(1)"))
$(eval LDFLAGS += $(shell sh -c "$(PKG_CONFIG) --libs-only-L lib$(1)"))
diff --git a/tools/tracing/rtla/src/actions.c b/tools/tracing/rtla/src/actions.c
index aaf0808125d7..13ff1934d47c 100644
--- a/tools/tracing/rtla/src/actions.c
+++ b/tools/tracing/rtla/src/actions.c
@@ -49,7 +49,7 @@ actions_destroy(struct actions *self)
static struct action *
actions_new(struct actions *self)
{
- if (self->size >= self->len) {
+ if (self->len >= self->size) {
self->size *= 2;
self->list = realloc(self->list, self->size * sizeof(struct action));
}
@@ -131,7 +131,7 @@ actions_parse(struct actions *self, const char *trigger)
{
enum action_type type = ACTION_NONE;
char *token;
- char trigger_c[strlen(trigger)];
+ char trigger_c[strlen(trigger) + 1];
/* For ACTION_SIGNAL */
int signal = 0, pid = 0;